[llvm] 591371f - AMDGPU: Regenerate some mir test checks with -NEXT

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 18 07:49:02 PST 2021


Author: Matt Arsenault
Date: 2021-12-18T10:46:15-05:00
New Revision: 591371f7df3cee0429c85f2aeb4a848df8888386

URL: https://github.com/llvm/llvm-project/commit/591371f7df3cee0429c85f2aeb4a848df8888386
DIFF: https://github.com/llvm/llvm-project/commit/591371f7df3cee0429c85f2aeb4a848df8888386.diff

LOG: AMDGPU: Regenerate some mir test checks with -NEXT

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-anyext.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-concat-vectors.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-add.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-and.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-max.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-min.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-or.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-sub.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umax.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umin.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg-flat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xor.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpy.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpyinline.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memmove.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values-build-vector.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-flat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-local.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-private.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-flat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-local.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-private.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-anyext.mir
index faf99d57a5ea7..b6711b4a5e838 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-anyext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-anyext.mir
@@ -9,7 +9,7 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_trunc_v2s32_to_v2s16_to_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: $vgpr0_vgpr1 = COPY [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = G_TRUNC %0
     %2:_(<2 x s32>) = G_ANYEXT %1
@@ -24,11 +24,11 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_trunc_v2s32_to_v2s16_to_v2s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = G_TRUNC %0
     %2:_(<2 x s64>) = G_ANYEXT %1
@@ -43,8 +43,8 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_trunc_v2s32_to_v2s8_to_v2s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>)
-    ; CHECK: $vgpr0 = COPY [[TRUNC]](<2 x s16>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](<2 x s16>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_TRUNC %0
     %2:_(<2 x s16>) = G_ANYEXT %1
@@ -59,7 +59,7 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_trunc_v3s32_to_v3s16_to_v3s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]](<3 x s32>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]](<3 x s32>)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s16>) = G_TRUNC %0
     %2:_(<3 x s32>) = G_ANYEXT %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir
index 12f296573ce93..cc68346462c95 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir
@@ -13,15 +13,16 @@ body:             |
 
     ; GFX9-LABEL: name: revisit_build_vector_unmerge_user
     ; GFX9: liveins: $vgpr0_vgpr1
-    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32)
-    ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C]](s32)
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXT1]], [[C]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32)
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C]](s32)
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXT1]], [[C]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_CONSTANT i32 2
     %2:_(<2 x s32>) = G_BUILD_VECTOR %1, %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-concat-vectors.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-concat-vectors.mir
index 9679bc4af52a4..b528ef41ce007 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-concat-vectors.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-concat-vectors.mir
@@ -13,25 +13,26 @@ body:             |
 
     ; GFX9-LABEL: name: revisit_concat_vectors_unmerge_user
     ; GFX9: liveins: $vgpr0_vgpr1
-    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C3]](s32)
-    ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SHL]](<2 x s16>), [[SHL1]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[LSHR]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C3]](s32)
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SHL]](<2 x s16>), [[SHL1]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(s32) = COPY $vgpr0
     %1:_(<4 x s8>) = G_BITCAST %0
     %2:_(s16) = G_CONSTANT i16 2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir
index fb2e6984804c3..b7ec21b576af4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir
@@ -8,8 +8,8 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s32_merge_s64_s32_s32_offset0
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
     %0:_(s32) = G_CONSTANT i32 0
     %1:_(s32) = G_CONSTANT i32 1
     %2:_(s64) = G_MERGE_VALUES %0, %1
@@ -24,8 +24,8 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s32_merge_s64_s32_s32_offset32
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
     %0:_(s32) = G_CONSTANT i32 0
     %1:_(s32) = G_CONSTANT i32 1
     %2:_(s64) = G_MERGE_VALUES %0, %1
@@ -40,8 +40,8 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s64_merge_s128_s64_s64_offset0
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64)
     %0:_(s64) = G_CONSTANT i64 0
     %1:_(s64) = G_CONSTANT i64 1
     %2:_(s128) = G_MERGE_VALUES %0, %1
@@ -56,8 +56,8 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s64_merge_s128_s64_s64_offset64
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64)
     %0:_(s64) = G_CONSTANT i64 0
     %1:_(s64) = G_CONSTANT i64 1
     %2:_(s128) = G_MERGE_VALUES %0, %1
@@ -72,8 +72,8 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s32_merge_s128_s64_s64_offset0
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 0
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 0
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s64) = G_CONSTANT i64 0
     %1:_(s64) = G_CONSTANT i64 1
     %2:_(s128) = G_MERGE_VALUES %0, %1
@@ -88,8 +88,8 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s32_merge_s128_s64_s64_offset32
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 32
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 32
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s64) = G_CONSTANT i64 0
     %1:_(s64) = G_CONSTANT i64 1
     %2:_(s128) = G_MERGE_VALUES %0, %1
@@ -104,8 +104,8 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s32_merge_s128_s64_s64_offset64
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 0
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 0
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s64) = G_CONSTANT i64 0
     %1:_(s64) = G_CONSTANT i64 1
     %2:_(s128) = G_MERGE_VALUES %0, %1
@@ -120,8 +120,8 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s32_merge_s128_s64_s64_offset96
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 32
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 32
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s64) = G_CONSTANT i64 0
     %1:_(s64) = G_CONSTANT i64 1
     %2:_(s128) = G_MERGE_VALUES %0, %1
@@ -137,9 +137,9 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s16_merge_s128_s64_s64_offset18
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[C]](s64), 18
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[C]](s64), 18
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s64) = G_CONSTANT i64 0
     %1:_(s64) = G_CONSTANT i64 1
     %2:_(s128) = G_MERGE_VALUES %0, %1
@@ -156,9 +156,9 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s16_merge_s128_s64_s64_offset82
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[C]](s64), 18
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[C]](s64), 18
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s64) = G_CONSTANT i64 0
     %1:_(s64) = G_CONSTANT i64 1
     %2:_(s128) = G_MERGE_VALUES %0, %1
@@ -176,10 +176,10 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s64_merge_s128_s64_s64_offset32
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](s64), [[C1]](s64)
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s128), 32
-    ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](s64), [[C1]](s64)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s128), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
     %0:_(s64) = G_CONSTANT i64 0
     %1:_(s64) = G_CONSTANT i64 1
     %2:_(s128) = G_MERGE_VALUES %0, %1
@@ -196,10 +196,10 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s16_merge_s32_s32_offset1
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[MV]](s64), 1
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[MV]](s64), 1
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s32) = G_CONSTANT i32 0
     %1:_(s32) = G_CONSTANT i32 1
     %2:_(s64) = G_MERGE_VALUES %0, %1
@@ -217,8 +217,8 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s32_merge_s96_s32_s32_s32_offset0
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
     %0:_(s32) = G_CONSTANT i32 0
     %1:_(s32) = G_CONSTANT i32 1
     %2:_(s32) = G_CONSTANT i32 1
@@ -234,8 +234,8 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s32_merge_s96_s32_s32_s32_offset64
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
     %0:_(s32) = G_CONSTANT i32 0
     %1:_(s32) = G_CONSTANT i32 1
     %2:_(s32) = G_CONSTANT i32 1
@@ -251,11 +251,11 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s64_merge_s96_s32_s32_s32_offset0
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[C]](s32), [[C1]](s32), [[C2]](s32)
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s96), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[C]](s32), [[C1]](s32), [[C2]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s96), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
     %0:_(s32) = G_CONSTANT i32 0
     %1:_(s32) = G_CONSTANT i32 1
     %2:_(s32) = G_CONSTANT i32 1
@@ -271,11 +271,11 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s64_merge_s96_s32_s32_s32_offset32
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[C]](s32), [[C1]](s32), [[C2]](s32)
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s96), 32
-    ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[C]](s32), [[C1]](s32), [[C2]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s96), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
     %0:_(s32) = G_CONSTANT i32 0
     %1:_(s32) = G_CONSTANT i32 1
     %2:_(s32) = G_CONSTANT i32 1
@@ -292,8 +292,8 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s64_build_vector_v2s64_s64_s64_offset0
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64)
     %0:_(s64) = G_CONSTANT i64 0
     %1:_(s64) = G_CONSTANT i64 1
     %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1
@@ -308,8 +308,8 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s64_build_vector_v2s64_s64_s64_offset64
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64)
     %0:_(s64) = G_CONSTANT i64 0
     %1:_(s64) = G_CONSTANT i64 1
     %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1
@@ -324,10 +324,10 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s64_build_vector_v2s64_s64_s64_offset32
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[BUILD_VECTOR]](<2 x s64>), 32
-    ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[BUILD_VECTOR]](<2 x s64>), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
     %0:_(s64) = G_CONSTANT i64 0
     %1:_(s64) = G_CONSTANT i64 1
     %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1
@@ -343,8 +343,8 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s32_build_vector_v2s64_s64_s64_offset64
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 0
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 0
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s64) = G_CONSTANT i64 0
     %1:_(s64) = G_CONSTANT i64 1
     %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1
@@ -362,8 +362,8 @@ body: |
     liveins: $vgpr0, $vgpr1
     ; CHECK-LABEL: name: extract_v2s16_build_vector_v2s64_v2s16_v2s16_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY [[COPY]](<2 x s16>)
-    ; CHECK: $vgpr0 = COPY [[COPY1]](<2 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY [[COPY]](<2 x s16>)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
@@ -379,8 +379,8 @@ body: |
     liveins: $vgpr0, $vgpr1
     ; CHECK-LABEL: name: extract_v2s16_build_vector_v2s64_v2s16_v2s16_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY [[COPY]](<2 x s16>)
-    ; CHECK: $vgpr0 = COPY [[COPY1]](<2 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY [[COPY]](<2 x s16>)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
@@ -397,8 +397,8 @@ body: |
     liveins: $vgpr0, $vgpr1
     ; CHECK-LABEL: name: extract_s16_build_vector_v2s64_v2s16_v2s16_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
-    ; CHECK: $vgpr0 = COPY [[BITCAST]](s32)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
@@ -415,10 +415,10 @@ body: |
     liveins: $vgpr0, $vgpr1
     ; CHECK-LABEL: name: extract_s16_build_vector_v2s64_v2s16_v2s16_offset48
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
@@ -436,9 +436,9 @@ body: |
     liveins: $vgpr0, $vgpr1
     ; CHECK-LABEL: name: extract_s8_build_vector_v2s64_v2s16_v2s16_offset48
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s8) = G_EXTRACT [[COPY]](<2 x s16>), 16
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s8)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s8) = G_EXTRACT [[COPY]](<2 x s16>), 16
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s8)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir
index 2b5ce8e3f2844..689615ca56296 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir
@@ -9,11 +9,11 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_trunc_v2s32_to_v2s16_to_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 16
-    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 16
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 16
+    ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 16
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = G_TRUNC %0
     %2:_(<2 x s32>) = G_SEXT %1
@@ -28,13 +28,13 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_trunc_v2s32_to_v2s16_to_v2s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32)
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT]], 16
-    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT1]], 16
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SEXT_INREG]](s64), [[SEXT_INREG1]](s64)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT]], 16
+    ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT1]], 16
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SEXT_INREG]](s64), [[SEXT_INREG1]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = G_TRUNC %0
     %2:_(<2 x s64>) = G_SEXT %1
@@ -51,17 +51,17 @@ body: |
     ; unable to legalize. This prevents further legalization.
     ; CHECK-LABEL: name: test_sext_trunc_v2s32_to_v2s8_to_v2s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 8
-    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 8
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C]]
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 8
+    ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 8
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_TRUNC %0
     %2:_(<2 x s16>) = G_SEXT %1
@@ -76,12 +76,12 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_trunc_v3s32_to_v3s16_to_v3s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 16
-    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 16
-    ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 16
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 16
+    ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 16
+    ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 16
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s16>) = G_TRUNC %0
     %2:_(<3 x s32>) = G_SEXT %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir
index 2c3d0c49639af..5d03021cf02f9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir
@@ -10,7 +10,7 @@ body: |
     ; of the merge source
     ; CHECK-LABEL: name: trunc_s16_merge_s64_s32
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: $vgpr0 = COPY [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
     %0:_(s32) = G_CONSTANT i32 0
     %1:_(s32) = G_CONSTANT i32 1
     %2:_(s64) = G_MERGE_VALUES %0, %1
@@ -27,7 +27,7 @@ body: |
     ; Test that trunc(merge) with trunc-size == merge-source-size is eliminated
     ; CHECK-LABEL: name: trunc_s32_merge_s64_s32
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: $vgpr0 = COPY [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
     %0:_(s32) = G_CONSTANT i32 0
     %1:_(s32) = G_CONSTANT i32 1
     %2:_(s64) = G_MERGE_VALUES %0, %1
@@ -44,9 +44,9 @@ body: |
     ; smaller merge
     ; CHECK-LABEL: name: trunc_s64_merge_s128_s32
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(s32) = G_CONSTANT i32 0
     %1:_(s32) = G_CONSTANT i32 1
     %2:_(s128) = G_MERGE_VALUES %0, %1, %0, %1
@@ -62,10 +62,10 @@ body: |
     ; Test that trunc(merge) with a non-scalar merge source is not combined
     ; CHECK-LABEL: name: trunc_s32_merge_s128_p0
     ; CHECK: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
-    ; CHECK: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 1
-    ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](p0), [[C1]](p0)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s128)
-    ; CHECK: $vgpr0 = COPY [[TRUNC]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](p0), [[C1]](p0)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s128)
+    ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32)
     %0:_(p0) = G_CONSTANT i64 0
     %1:_(p0) = G_CONSTANT i64 1
     %2:_(s128) = G_MERGE_VALUES %0, %1
@@ -81,10 +81,10 @@ body: |
     ; Test that trunc(merge) with a non-scalar merge source is not combined
     ; CHECK-LABEL: name: trunc_s64_merge_s128_p0
     ; CHECK: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
-    ; CHECK: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 1
-    ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](p0), [[C1]](p0)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[MV]](s128)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[TRUNC]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](p0), [[C1]](p0)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[MV]](s128)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[TRUNC]](s64)
     %0:_(p0) = G_CONSTANT i64 0
     %1:_(p0) = G_CONSTANT i64 1
     %2:_(s128) = G_MERGE_VALUES %0, %1
@@ -100,10 +100,10 @@ body: |
     ; Test that trunc(merge) with a non-scalar merge source is not combined
     ; CHECK-LABEL: name: trunc_s128_merge_s192_p0
     ; CHECK: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
-    ; CHECK: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 1
-    ; CHECK: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[C]](p0), [[C1]](p0), [[C]](p0)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s128) = G_TRUNC [[MV]](s192)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[TRUNC]](s128)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[C]](p0), [[C1]](p0), [[C]](p0)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s128) = G_TRUNC [[MV]](s192)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[TRUNC]](s128)
     %0:_(p0) = G_CONSTANT i64 0
     %1:_(p0) = G_CONSTANT i64 1
     %2:_(s192) = G_MERGE_VALUES %0, %1, %0
@@ -121,7 +121,7 @@ body: |
     ; have been combined.
     ; CHECK-LABEL: name: trunc_s68_merge_s128_s32
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: $vgpr0 = COPY [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
     %0:_(s32) = G_CONSTANT i32 0
     %1:_(s32) = G_CONSTANT i32 1
     %2:_(s128) = G_MERGE_VALUES %0, %1, %0, %1
@@ -138,8 +138,8 @@ body: |
     ; Test that trunc(trunc) is combined to a single trunc
     ; CHECK-LABEL: name: trunc_trunc
     ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
-    ; CHECK: $vgpr0 = COPY [[TRUNC]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
+    ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32)
     %0:_(s64) = G_IMPLICIT_DEF
     %1:_(s48) = G_TRUNC %0
     %2:_(s32) = G_TRUNC %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
index fdfd061c5d70c..2a7db868d0052 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
@@ -7,17 +7,17 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s1_trunc_v2s1_of_build_vector_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]]
-    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1
-    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]]
+    ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1
+    ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>)
@@ -44,17 +44,17 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s1_trunc_v2s1_of_build_vector_v2s32_extra_copies
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]]
-    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1
-    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]]
+    ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1
+    ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>)
@@ -81,17 +81,17 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s32_sext_v2s32_of_build_vector_v2s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]]
-    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP]](s1)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP1]](s1)
-    ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ANYEXT]](s16)
-    ; CHECK: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ANYEXT1]](s16)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]]
+    ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP]](s1)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP1]](s1)
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ANYEXT]](s16)
+    ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ANYEXT1]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>)
@@ -114,17 +114,17 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s32_zext_v2s32_of_build_vector_v2s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]]
-    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP]](s1)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP1]](s1)
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT]](s16)
-    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]]
+    ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP]](s1)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP1]](s1)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT]](s16)
+    ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>)
@@ -147,15 +147,15 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s32_anyext_v2s32_of_build_vector_v2s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]]
-    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]]
+    ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>)
@@ -179,22 +179,22 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_v2s16_zext_v4s32_of_build_vector_v4s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]]
-    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP]](s1)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP1]](s1)
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT]](s16)
-    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16)
-    ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT]](s16)
-    ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ZEXT]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ZEXT1]](s32)
-    ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ZEXT2]](s32)
-    ; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ZEXT3]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>), implicit [[BITCAST2]](<2 x s16>), implicit [[BITCAST3]](<2 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]]
+    ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP]](s1)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP1]](s1)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT]](s16)
+    ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16)
+    ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT]](s16)
+    ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ZEXT]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ZEXT1]](s32)
+    ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ZEXT2]](s32)
+    ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ZEXT3]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>), implicit [[BITCAST2]](<2 x s16>), implicit [[BITCAST3]](<2 x s16>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>)
@@ -216,17 +216,17 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s1_trunc_v4s1_of_concat_vectors_v4s32_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 1
-    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1
-    ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 1
-    ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV3]], 1
-    ; CHECK: $vgpr0 = COPY [[SEXT_INREG]](s32)
-    ; CHECK: $vgpr1 = COPY [[SEXT_INREG1]](s32)
-    ; CHECK: $vgpr2 = COPY [[SEXT_INREG2]](s32)
-    ; CHECK: $vgpr3 = COPY [[SEXT_INREG3]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 1
+    ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1
+    ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 1
+    ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV3]], 1
+    ; CHECK-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[SEXT_INREG1]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[SEXT_INREG2]](s32)
+    ; CHECK-NEXT: $vgpr3 = COPY [[SEXT_INREG3]](s32)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
@@ -248,17 +248,17 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s16_of_concat_vectors_v2s16_v2s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
@@ -272,10 +272,10 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s32_of_concat_vectors_v2s32_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr1_vgpr2
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; CHECK: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32), implicit [[UV3]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr1_vgpr2
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32), implicit [[UV3]](s32)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr1_vgpr2
     %2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
@@ -289,10 +289,10 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s32_of_concat_vectors_v2s64_v2s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
-    ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; CHECK: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32), implicit [[UV3]](s32), implicit [[UV4]](s32), implicit [[UV5]](s32), implicit [[UV6]](s32), implicit [[UV7]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32), implicit [[UV3]](s32), implicit [[UV4]](s32), implicit [[UV5]](s32), implicit [[UV6]](s32), implicit [[UV7]](s32)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<4 x s64>) = G_CONCAT_VECTORS %0, %1
@@ -306,14 +306,14 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s32_of_trunc_concat_vectors_v2s64_v2s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s32), implicit [[TRUNC1]](s32), implicit [[TRUNC2]](s32), implicit [[TRUNC3]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s32), implicit [[TRUNC1]](s32), implicit [[TRUNC2]](s32), implicit [[TRUNC3]](s32)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<4 x s64>) = G_CONCAT_VECTORS %0, %1
@@ -328,14 +328,14 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s64_of_sext_concat_vectors_v2s32_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32)
-    ; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32)
-    ; CHECK: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32)
-    ; CHECK: [[SEXT3:%[0-9]+]]:_(s64) = G_SEXT [[UV3]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[SEXT]](s64), implicit [[SEXT1]](s64), implicit [[SEXT2]](s64), implicit [[SEXT3]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32)
+    ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32)
+    ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32)
+    ; CHECK-NEXT: [[SEXT3:%[0-9]+]]:_(s64) = G_SEXT [[UV3]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT]](s64), implicit [[SEXT1]](s64), implicit [[SEXT2]](s64), implicit [[SEXT3]](s64)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
@@ -350,14 +350,14 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s64_of_zext_concat_vectors_v2s32_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32)
-    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32)
-    ; CHECK: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32)
-    ; CHECK: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[ZEXT]](s64), implicit [[ZEXT1]](s64), implicit [[ZEXT2]](s64), implicit [[ZEXT3]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32)
+    ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32)
+    ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32)
+    ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64), implicit [[ZEXT1]](s64), implicit [[ZEXT2]](s64), implicit [[ZEXT3]](s64)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
@@ -372,14 +372,14 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s64_of_anyext_concat_vectors_v2s32_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32)
-    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV2]](s32)
-    ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV3]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[ANYEXT]](s64), implicit [[ANYEXT1]](s64), implicit [[ANYEXT2]](s64), implicit [[ANYEXT3]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32)
+    ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV2]](s32)
+    ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV3]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[ANYEXT]](s64), implicit [[ANYEXT1]](s64), implicit [[ANYEXT2]](s64), implicit [[ANYEXT3]](s64)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
@@ -394,29 +394,29 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s8_of_trunc_v4s16_concat_vectors_v2s32_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY1]](<2 x s32>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]](<2 x s16>)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST]](s32)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
-    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR2]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC1]](<2 x s16>)
-    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST1]](s32)
-    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR3]](s32)
-    ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
-    ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR4]](s32)
-    ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
-    ; CHECK: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR5]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC2]](s8), implicit [[TRUNC3]](s8), implicit [[TRUNC4]](s8), implicit [[TRUNC5]](s8), implicit [[TRUNC6]](s8), implicit [[TRUNC7]](s8), implicit [[TRUNC8]](s8), implicit [[TRUNC9]](s8)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]](<2 x s16>)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR2]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC1]](<2 x s16>)
+    ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST1]](s32)
+    ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR3]](s32)
+    ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR4]](s32)
+    ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR5]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC2]](s8), implicit [[TRUNC3]](s8), implicit [[TRUNC4]](s8), implicit [[TRUNC5]](s8), implicit [[TRUNC6]](s8), implicit [[TRUNC7]](s8), implicit [[TRUNC8]](s8), implicit [[TRUNC9]](s8)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
@@ -431,17 +431,17 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s16_of_anyext_v4s64_concat_vectors_v2s32_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
@@ -457,18 +457,18 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s32_of_concat_vectors_v4s32_v4s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
-    ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr6_vgpr7
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY1]](<2 x s32>)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY2]](<2 x s32>)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY3]](<2 x s32>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]](<2 x s16>)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC1]](<2 x s16>)
-    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC2]](<2 x s16>)
-    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC3]](<2 x s16>)
-    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](s32), implicit [[BITCAST1]](s32), implicit [[BITCAST2]](s32), implicit [[BITCAST3]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr6_vgpr7
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY2]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY3]](<2 x s32>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]](<2 x s16>)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC1]](<2 x s16>)
+    ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC2]](<2 x s16>)
+    ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC3]](<2 x s16>)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](s32), implicit [[BITCAST1]](s32), implicit [[BITCAST2]](s32), implicit [[BITCAST3]](s32)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s32>) = COPY $vgpr4_vgpr5
@@ -485,16 +485,16 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s64_of_build_vector_v4s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
-    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]](<2 x s16>)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC1]](<2 x s16>)
-    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](s32), implicit [[BITCAST1]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]](<2 x s16>)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC1]](<2 x s16>)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](s32), implicit [[BITCAST1]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -518,20 +518,20 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s128_of_zext_of_concat_vectors
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
-    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND2]](s32), [[AND3]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s64), implicit [[MV1]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND2]](s32), [[AND3]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64), implicit [[MV1]](s64)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
@@ -547,11 +547,11 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_v3s32_of_v12s32_concat_vectors_v4s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>), [[COPY2]](<4 x s32>)
-    ; CHECK: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>)
-    ; CHECK: S_ENDPGM 0, implicit [[UV]](<3 x s32>), implicit [[UV1]](<3 x s32>), implicit [[UV2]](<3 x s32>), implicit [[UV3]](<3 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>), [[COPY2]](<4 x s32>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](<3 x s32>), implicit [[UV1]](<3 x s32>), implicit [[UV2]](<3 x s32>), implicit [[UV3]](<3 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
@@ -566,34 +566,34 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_v3s16_of_v12s16_concat_vectors_v4s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32)
-    ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
-    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
-    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
-    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR1]](s32), [[BITCAST3]](s32), [[LSHR2]](s32)
-    ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
-    ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
-    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
-    ; CHECK: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>)
-    ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
-    ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR3]](s32), [[BITCAST5]](s32)
-    ; CHECK: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>)
-    ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
-    ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
-    ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
-    ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
-    ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR4]](s32), [[BITCAST7]](s32), [[LSHR5]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>), implicit [[BUILD_VECTOR1]](<3 x s32>), implicit [[BUILD_VECTOR2]](<3 x s32>), implicit [[BUILD_VECTOR3]](<3 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR1]](s32), [[BITCAST3]](s32), [[LSHR2]](s32)
+    ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR3]](s32), [[BITCAST5]](s32)
+    ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR4]](s32), [[BITCAST7]](s32), [[LSHR5]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>), implicit [[BUILD_VECTOR1]](<3 x s32>), implicit [[BUILD_VECTOR2]](<3 x s32>), implicit [[BUILD_VECTOR3]](<3 x s32>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
     %2:_(<4 x s16>) = COPY $vgpr4_vgpr5
@@ -615,27 +615,28 @@ body:             |
 
     ; CHECK-LABEL: name: unmerge_v2s16_from_v4s16_sext_v4s8_concat_vectors_v2s8
     ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8
-    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C]]
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8
-    ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 8
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C]]
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>)
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8
+    ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8
+    ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 8
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C]]
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C]]
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -661,45 +662,46 @@ body:             |
 
     ; CHECK-LABEL: name: unmerge_v2s16_from_v8s16_sext_v8s8_concat_vectors_v4s8
     ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8
-    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C]]
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8
-    ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 8
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C]]
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 8
-    ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 8
-    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG4]], [[C]]
-    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG5]], [[C]]
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 8
-    ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 8
-    ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG6]], [[C]]
-    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG7]], [[C]]
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>), implicit [[BITCAST2]](<2 x s16>), implicit [[BITCAST3]](<2 x s16>)
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8
+    ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8
+    ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 8
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C]]
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C]]
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 8
+    ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 8
+    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG4]], [[C]]
+    ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG5]], [[C]]
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 8
+    ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 8
+    ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG6]], [[C]]
+    ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG7]], [[C]]
+    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>), implicit [[BITCAST2]](<2 x s16>), implicit [[BITCAST3]](<2 x s16>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -733,81 +735,82 @@ body:             |
 
     ; CHECK-LABEL: name: unmerge_v2s16_from_v16s16_sext_v16s8_concat_vectors_v8s8
     ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-    ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-    ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-    ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-    ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8
-    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C]]
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8
-    ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 8
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C]]
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 8
-    ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 8
-    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG4]], [[C]]
-    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG5]], [[C]]
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 8
-    ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 8
-    ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG6]], [[C]]
-    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG7]], [[C]]
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CHECK: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY8]], 8
-    ; CHECK: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY9]], 8
-    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG8]], [[C]]
-    ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG9]], [[C]]
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; CHECK: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY10]], 8
-    ; CHECK: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY11]], 8
-    ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG10]], [[C]]
-    ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG11]], [[C]]
-    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32)
-    ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; CHECK: [[SEXT_INREG12:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY12]], 8
-    ; CHECK: [[SEXT_INREG13:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY13]], 8
-    ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG12]], [[C]]
-    ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG13]], [[C]]
-    ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
-    ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]]
-    ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
-    ; CHECK: [[SEXT_INREG14:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY14]], 8
-    ; CHECK: [[SEXT_INREG15:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY15]], 8
-    ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG14]], [[C]]
-    ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG15]], [[C]]
-    ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C1]](s32)
-    ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL7]]
-    ; CHECK: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>), implicit [[BITCAST2]](<2 x s16>), implicit [[BITCAST3]](<2 x s16>), implicit [[BITCAST4]](<2 x s16>), implicit [[BITCAST5]](<2 x s16>), implicit [[BITCAST6]](<2 x s16>), implicit [[BITCAST7]](<2 x s16>)
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+    ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+    ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+    ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+    ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+    ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+    ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8
+    ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8
+    ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 8
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C]]
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C]]
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 8
+    ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 8
+    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG4]], [[C]]
+    ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG5]], [[C]]
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 8
+    ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 8
+    ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG6]], [[C]]
+    ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG7]], [[C]]
+    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY8]], 8
+    ; CHECK-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY9]], 8
+    ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG8]], [[C]]
+    ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG9]], [[C]]
+    ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY10]], 8
+    ; CHECK-NEXT: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY11]], 8
+    ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG10]], [[C]]
+    ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG11]], [[C]]
+    ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG12:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY12]], 8
+    ; CHECK-NEXT: [[SEXT_INREG13:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY13]], 8
+    ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG12]], [[C]]
+    ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG13]], [[C]]
+    ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]]
+    ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG14:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY14]], 8
+    ; CHECK-NEXT: [[SEXT_INREG15:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY15]], 8
+    ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG14]], [[C]]
+    ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG15]], [[C]]
+    ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL7]]
+    ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>), implicit [[BITCAST2]](<2 x s16>), implicit [[BITCAST3]](<2 x s16>), implicit [[BITCAST4]](<2 x s16>), implicit [[BITCAST5]](<2 x s16>), implicit [[BITCAST6]](<2 x s16>), implicit [[BITCAST7]](<2 x s16>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -855,10 +858,10 @@ body:             |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
     ; CHECK-LABEL: name: test_unmerge_values_s32_trunc_s96_of_merge_values_s192_s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = COPY $vgpr4_vgpr5
@@ -876,20 +879,20 @@ body:             |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
     ; CHECK-LABEL: name: test_unmerge_values_s16_trunc_s96_of_merge_values_s192_s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16), implicit [[TRUNC4]](s16), implicit [[TRUNC5]](s16)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16), implicit [[TRUNC4]](s16), implicit [[TRUNC5]](s16)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = COPY $vgpr4_vgpr5
@@ -907,24 +910,24 @@ body:             |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
     ; CHECK-LABEL: name: test_unmerge_values_s16_trunc_s96_of_merge_values_s192_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; CHECK: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
-    ; CHECK: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
-    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s192), implicit [[MV1]](s96), implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16), implicit [[TRUNC4]](s16), implicit [[TRUNC5]](s16)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s192), implicit [[MV1]](s96), implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16), implicit [[TRUNC4]](s16), implicit [[TRUNC5]](s16)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -944,11 +947,11 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s64_anyext_s128_of_merge_values_s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
-    ; CHECK: $vgpr2_vgpr3 = COPY [[DEF]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[DEF]](s64)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s64) = G_MERGE_VALUES %0, %1
@@ -965,9 +968,9 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_values_s32_trunc_s64_of_merge_values_s128
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; CHECK: $vgpr0 = COPY [[UV]](s32)
-    ; CHECK: $vgpr1 = COPY [[UV1]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s128) = G_MERGE_VALUES %0, %1
@@ -985,12 +988,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_unmerge_values_s8_v4s8_trunc_v4s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s8), implicit [[TRUNC1]](s8), implicit [[TRUNC2]](s8), implicit [[TRUNC3]](s8)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s8), implicit [[TRUNC1]](s8), implicit [[TRUNC2]](s8), implicit [[TRUNC3]](s8)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<4 x s8>) = G_TRUNC %0
     %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1
@@ -1006,10 +1009,10 @@ body:             |
 
     ; CHECK-LABEL: name: test_unmerge_values_v2s8_v4s8_trunc_v4s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](<2 x s16>), implicit [[TRUNC1]](<2 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](<2 x s16>), implicit [[TRUNC1]](<2 x s16>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<4 x s8>) = G_TRUNC %0
     %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %1
@@ -1027,10 +1030,10 @@ body:             |
 
     ; CHECK-LABEL: name: test_unmerge_values_v4s8_v8s8_trunc_v8s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[UV]](<4 x s32>)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[UV1]](<4 x s32>)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](<4 x s8>), implicit [[TRUNC1]](<4 x s8>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[UV]](<4 x s32>)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[UV1]](<4 x s32>)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](<4 x s8>), implicit [[TRUNC1]](<4 x s8>)
     %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     %1:_(<8 x s8>) = G_TRUNC %0
     %2:_(<4 x s8>), %3:_(<4 x s8>) = G_UNMERGE_VALUES %1
@@ -1047,12 +1050,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_unmerge_values_s16_v4s16_trunc_v4s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<4 x s16>) = G_TRUNC %0
     %2:_(s16), %3:_(s16), %4:_(s16), %5:_(s16) = G_UNMERGE_VALUES %1
@@ -1068,10 +1071,10 @@ body:             |
 
     ; CHECK-LABEL: name: test_unmerge_values_v2s16_v4s16_trunc_v4s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](<2 x s16>), implicit [[TRUNC1]](<2 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](<2 x s16>), implicit [[TRUNC1]](<2 x s16>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<4 x s16>) = G_TRUNC %0
     %2:_(<2 x s16>), %3:_(<2 x s16>) = G_UNMERGE_VALUES %1
@@ -1087,12 +1090,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_unmerge_values_v2s16_v8s16_trunc_v8s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](<2 x s16>), implicit [[TRUNC1]](<2 x s16>), implicit [[TRUNC2]](<2 x s16>), implicit [[TRUNC3]](<2 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](<2 x s16>), implicit [[TRUNC1]](<2 x s16>), implicit [[TRUNC2]](<2 x s16>), implicit [[TRUNC3]](<2 x s16>)
     %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     %1:_(<8 x s16>) = G_TRUNC %0
     %2:_(<2 x s16>), %3:_(<2 x s16>), %4:_(<2 x s16>), %5:_(<2 x s16>) = G_UNMERGE_VALUES %1
@@ -1108,16 +1111,16 @@ body:             |
 
     ; CHECK-LABEL: name: test_unmerge_values_v4s16_v8s16_trunc_v8s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV]](<4 x s32>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>)
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>)
-    ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV1]](<4 x s32>)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>)
-    ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>)
-    ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>), implicit [[CONCAT_VECTORS1]](<4 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV]](<4 x s32>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>)
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>)
+    ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV1]](<4 x s32>)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>)
+    ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>), implicit [[CONCAT_VECTORS1]](<4 x s16>)
     %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     %1:_(<8 x s16>) = G_TRUNC %0
     %2:_(<4 x s16>), %3:_(<4 x s16>) = G_UNMERGE_VALUES %1
@@ -1133,17 +1136,17 @@ body:             |
 
     ; CHECK-LABEL: name: test_unmerge_values_s8_v4s8_trunc_v4s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s32)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST1]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s8), implicit [[TRUNC1]](s8), implicit [[TRUNC2]](s8), implicit [[TRUNC3]](s8)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s32)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST1]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s8), implicit [[TRUNC1]](s8), implicit [[TRUNC2]](s8), implicit [[TRUNC3]](s8)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s8>) = G_TRUNC %0
     %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1
@@ -1159,8 +1162,8 @@ body:             |
 
     ; CHECK-LABEL: name: test_unmerge_values_v2s8_v4s8_trunc_v4s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
-    ; CHECK: S_ENDPGM 0, implicit [[UV]](<2 x s16>), implicit [[UV1]](<2 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](<2 x s16>), implicit [[UV1]](<2 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s8>) = G_TRUNC %0
     %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %1
@@ -1178,12 +1181,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_unmerge_values_s32_v4s32_trunc_v4s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s32), implicit [[TRUNC1]](s32), implicit [[TRUNC2]](s32), implicit [[TRUNC3]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s32), implicit [[TRUNC1]](s32), implicit [[TRUNC2]](s32), implicit [[TRUNC3]](s32)
     %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     %1:_(<4 x s32>) = G_TRUNC %0
     %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %1
@@ -1199,16 +1202,16 @@ body:             |
 
     ; CHECK-LABEL: name: test_unmerge_values_v2s32_v4s32_trunc_v4s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
-    ; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](<2 x s64>)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV4]](s64)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV5]](s64)
-    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC2]](s32), [[TRUNC3]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>), implicit [[BUILD_VECTOR1]](<2 x s32>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
+    ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](<2 x s64>)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV4]](s64)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV5]](s64)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC2]](s32), [[TRUNC3]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>), implicit [[BUILD_VECTOR1]](<2 x s32>)
     %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     %1:_(<4 x s32>) = G_TRUNC %0
     %2:_(<2 x s32>), %3:_(<2 x s32>) = G_UNMERGE_VALUES %1
@@ -1224,12 +1227,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_unmerge_values_s16_v4s16_trunc_v4s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s64)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s64)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s64)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s64)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s64)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s64)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16)
     %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     %1:_(<4 x s16>) = G_TRUNC %0
     %2:_(s16), %3:_(s16), %4:_(s16), %5:_(s16) = G_UNMERGE_VALUES %1
@@ -1245,26 +1248,26 @@ body:             |
 
     ; CHECK-LABEL: name: test_unmerge_values_v2s16_v4s16_trunc_v4s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]]
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]]
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](<2 x s64>)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV4]](s64)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]]
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV5]](s64)
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]]
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](<2 x s64>)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV4]](s64)
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]]
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV5]](s64)
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C]]
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>)
     %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     %1:_(<4 x s16>) = G_TRUNC %0
     %2:_(<2 x s16>), %3:_(<2 x s16>) = G_UNMERGE_VALUES %1
@@ -1280,23 +1283,23 @@ body:             |
 
     ; CHECK-LABEL: name: test_unmerge_values_s16_from_v3s16_from_v6s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
-    ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
-    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
-    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16), implicit [[TRUNC4]](s16), implicit [[TRUNC5]](s16)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+    ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16), implicit [[TRUNC4]](s16), implicit [[TRUNC5]](s16)
   %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
   %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0
   %3:_(s16), %4:_(s16), %5:_(s16) = G_UNMERGE_VALUES %1
@@ -1313,21 +1316,21 @@ body:             |
 
     ; CHECK-LABEL: name: test_unmerge_values_s16_from_v3s16_from_v6s16_other_def_use
     ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
-    ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR1]](s32), [[BITCAST3]](s32), [[LSHR2]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[BUILD_VECTOR]](<3 x s32>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+    ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR1]](s32), [[BITCAST3]](s32), [[LSHR2]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[BUILD_VECTOR]](<3 x s32>)
   %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
   %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0
   %3:_(s16), %4:_(s16), %5:_(s16) = G_UNMERGE_VALUES %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir
index dace15bbdc77a..71eee774acbe7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir
@@ -9,10 +9,10 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_trunc_v2s32_to_v2s16_to_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = G_TRUNC %0
     %2:_(<2 x s32>) = G_ZEXT %1
@@ -27,14 +27,14 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_trunc_v2s32_to_v2s16_to_v2s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C]]
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C]]
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = G_TRUNC %0
     %2:_(<2 x s64>) = G_ZEXT %1
@@ -49,15 +49,15 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_trunc_v2s32_to_v2s8_to_v2s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL]]
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>)
-    ; CHECK: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[TRUNC]], [[BITCAST]]
-    ; CHECK: $vgpr0 = COPY [[AND]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL]]
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[TRUNC]], [[BITCAST]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AND]](<2 x s16>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_TRUNC %0
     %2:_(<2 x s16>) = G_ZEXT %1
@@ -72,19 +72,19 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_trunc_v3s32_to_v3s16_to_v3s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
-    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32)
-    ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
-    ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[DEF]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[DEF1]](<2 x s32>)
-    ; CHECK: [[UV3:%[0-9]+]]:_(<3 x s32>), [[UV4:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[UV3]](<3 x s32>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[DEF1]](<2 x s32>)
+    ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<3 x s32>), [[UV4:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV3]](<3 x s32>)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s16>) = G_TRUNC %0
     %2:_(<3 x s32>) = G_ZEXT %1
@@ -101,13 +101,13 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_128_trunc_s128_merge
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
-    ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]]
-    ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr0_vgpr1
     %2:_(s128) = G_MERGE_VALUES %0, %1
@@ -124,12 +124,12 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s8_to_s32_of_sext_s1_to_s8
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C]]
-    ; CHECK: $vgpr0 = COPY [[AND]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -146,12 +146,12 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s8_to_s32_of_sext_s1_to_s16
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C]]
-    ; CHECK: $vgpr0 = COPY [[AND]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -168,11 +168,11 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s8_to_s32_of_sext_s8_to_s16
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 8
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]]
-    ; CHECK: $vgpr0 = COPY [[AND]](s32)
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 8
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s8) = G_LOAD %0 :: (load (s8), addrspace 1)
     %2:_(s16) = G_SEXT %1
@@ -188,20 +188,20 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_v2s8_to_v2s32_of_sext_v2s1_to_v2s8
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s32), [[UV2]]
-    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV3]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1
-    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1
-    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s32), [[UV2]]
+    ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV3]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1
+    ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s1>) = G_ICMP intpred(eq), %0, %1
@@ -218,20 +218,20 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_v2s8_to_v2s32_of_sext_v2s1_to_v2s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s32), [[UV2]]
-    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV3]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1
-    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1
-    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s32), [[UV2]]
+    ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV3]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1
+    ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s1>) = G_ICMP intpred(eq), %0, %1
@@ -248,16 +248,16 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_v2s8_to_v2s32_of_sext_v2s8_to_v2s16
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32)
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 8
-    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 8
-    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>)
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 8
+    ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 8
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), addrspace 1)
     %2:_(<2 x s16>) = G_SEXT %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
index a4070498dafd9..f7bf2b25e95bf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
@@ -26,16 +26,16 @@ body: |
 
     ; GFX10-LABEL: name: value_finder_bug
     ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX10: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    ; GFX10: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96
-    ; GFX10: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64
-    ; GFX10: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; GFX10-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96
+    ; GFX10-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64
+    ; GFX10-NEXT: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
     %0:_(p5) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -58,16 +58,16 @@ body: |
 
     ; GFX10-LABEL: name: value_finder_bug_before_artifact_combine
     ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX10: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    ; GFX10: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96
-    ; GFX10: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64
-    ; GFX10: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; GFX10-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96
+    ; GFX10-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64
+    ; GFX10-NEXT: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
     %0:_(p5) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir
index 4b06a7c5405b1..ffbf3f55dfb27 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir
@@ -9,8 +9,8 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s32_to_s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(s32) = COPY $vgpr0
     %1:_(s64) = G_ANYEXT %0
     $vgpr0_vgpr1 = COPY %1
@@ -24,8 +24,8 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s16_to_s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(s32) = COPY $vgpr0
     %1:_(s16) = G_TRUNC %0
     %2:_(s64) = G_ANYEXT %1
@@ -40,7 +40,7 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s16_to_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s16) = G_TRUNC %0
     %2:_(s32) = G_ANYEXT %1
@@ -55,7 +55,7 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s24_to_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s24) = G_TRUNC %0
     %2:_(s32) = G_ANYEXT %1
@@ -69,7 +69,7 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s1_to_s32
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: $vgpr0 = COPY [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
     %0:_(s1) = G_CONSTANT i1 0
     %1:_(s32) = G_ANYEXT %0
     $vgpr0 = COPY %1
@@ -82,7 +82,7 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s1_to_s64
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[C]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](s64)
     %0:_(s1) = G_CONSTANT i1 0
     %1:_(s64) = G_ANYEXT %0
     $vgpr0_vgpr1 = COPY %1
@@ -96,11 +96,11 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_v2s16_to_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s32>) = G_ANYEXT %0
     $vgpr0_vgpr1 = COPY %1
@@ -114,16 +114,16 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_v3s16_to_v3s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
-    ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_EXTRACT %0, 0
     %2:_(<3 x s32>) = G_ANYEXT %1
@@ -138,7 +138,7 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_v4s16_to_v4s32
     ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<4 x s32>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<4 x s32>)
     %0:_(<4 x s16>) = G_IMPLICIT_DEF
     %1:_(<4 x s32>) = G_ANYEXT %0
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -152,11 +152,11 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_v2s32_to_v2s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_ANYEXT %0
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -170,12 +170,12 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_v3s32_to_v3s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32)
-    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV2]](s32)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64), [[ANYEXT2]](s64)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32)
+    ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV2]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64), [[ANYEXT2]](s64)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s64>) = G_ANYEXT %0
     S_NOP 0, implicit %1
@@ -190,13 +190,13 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_v4s32_to_v4s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32)
-    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV2]](s32)
-    ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV3]](s32)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64), [[ANYEXT2]](s64), [[ANYEXT3]](s64)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32)
+    ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV2]](s32)
+    ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV3]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64), [[ANYEXT2]](s64), [[ANYEXT3]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<4 x s64>) = G_ANYEXT %0
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -210,8 +210,8 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s8_to_s16
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s16)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_TRUNC %0
     %2:_(s16) = G_ANYEXT %1
@@ -226,8 +226,8 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s8_to_s24
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s24)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s24)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_TRUNC %0
     %2:_(s24) = G_ANYEXT %1
@@ -242,7 +242,7 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s7_to_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: S_ENDPGM 0, implicit [[COPY]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s7) = G_TRUNC %0
     %2:_(s32) = G_ANYEXT %1
@@ -257,7 +257,7 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s8_to_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: S_ENDPGM 0, implicit [[COPY]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_TRUNC %0
     %2:_(s32) = G_ANYEXT %1
@@ -272,12 +272,12 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s32_to_s96
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
-    ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s96)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s96)
     %0:_(s32) = COPY $vgpr0
     %1:_(s96) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -291,11 +291,11 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s32_to_s128
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
-    ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV1]](s128)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s128)
     %0:_(s32) = COPY $vgpr0
     %1:_(s128) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -309,12 +309,12 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s32_to_s160
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
-    ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV1:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s160) = G_TRUNC [[MV1]](s320)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s160)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s160) = G_TRUNC [[MV1]](s320)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s160)
     %0:_(s32) = COPY $vgpr0
     %1:_(s160) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -328,11 +328,11 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s32_to_s192
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
-    ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV1]](s192)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s192)
     %0:_(s32) = COPY $vgpr0
     %1:_(s192) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -346,12 +346,12 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s32_to_s224
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
-    ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV1]](s448)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s224)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV1]](s448)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s224)
     %0:_(s32) = COPY $vgpr0
     %1:_(s224) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -365,11 +365,11 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s32_to_s256
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
-    ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV1]](s256)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s256)
     %0:_(s32) = COPY $vgpr0
     %1:_(s256) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -383,11 +383,11 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s32_to_s512
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
-    ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV1:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV1]](s512)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s512)
     %0:_(s32) = COPY $vgpr0
     %1:_(s512) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -401,12 +401,12 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s32_to_s992
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
-    ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV1]](s448)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s224)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV1]](s448)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s224)
     %0:_(s32) = COPY $vgpr0
     %1:_(s224) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -420,11 +420,11 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s32_to_s1024
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
-    ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV1:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV1]](s1024)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s1024)
     %0:_(s32) = COPY $vgpr0
     %1:_(s1024) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -438,9 +438,9 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s64_to_s128
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[DEF]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s128)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[DEF]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s128)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -454,9 +454,9 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s64_to_s192
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[DEF]](s64), [[DEF]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s192)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s192)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s192) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -470,9 +470,9 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s64_to_s256
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s256)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s256)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s256) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -486,9 +486,9 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s64_to_s512
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s512)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s512)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s512) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -502,9 +502,9 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s64_to_s1024
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s1024)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s1024)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s1024) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -518,12 +518,12 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s96_to_s128
     ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
-    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32)
-    ; CHECK: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV2]](s128)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](s128)
     %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s128) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -537,10 +537,10 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s128_to_s256
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
-    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[DEF]](s64), [[DEF]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s256)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s256)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s256) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -554,45 +554,45 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s32_to_s88
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
-    ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
-    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
-    ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]]
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND4]], [[C4]](s16)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[OR2]](s16)
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[COPY1]](s16)
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32)
-    ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV1:%[0-9]+]]:_(s704) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64)
-    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s88) = G_TRUNC [[MV1]](s704)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC5]](s88)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]]
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND4]], [[C4]](s16)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[OR2]](s16)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[COPY1]](s16)
+    ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s704) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64)
+    ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s88) = G_TRUNC [[MV1]](s704)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC5]](s88)
     %0:_(s32) = COPY $vgpr0
     %1:_(s88) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -620,21 +620,21 @@ body: |
 
     ; CHECK-LABEL: name: test_anyext_s2_to_s112
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV1]](s448)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s112)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV1]](s448)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s112)
     %0:_(s32) = COPY $vgpr0
     %1:_(s2) = G_TRUNC %0
     %2:_(s112) = G_ANYEXT %1
@@ -648,7 +648,7 @@ body: |
     liveins: $vgpr0_vgpr1_vgpr2_vgpr3
     ; CHECK-LABEL: name: test_anyext_s112_to_s128
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: S_ENDPGM 0, implicit [[COPY]](s128)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s112) = G_TRUNC %0
     %2:_(s128) = G_ANYEXT %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir
index 9230cee0f2c51..2a411ba18dca0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir
@@ -9,12 +9,12 @@ body: |
 
     ; CHECK-LABEL: name: test_atomic_cmpxchg_with_success_s32_global
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
-    ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 1)
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]]
-    ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 1)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]]
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
   %0:_(p1) = COPY $vgpr0_vgpr1
   %1:_(s32) = COPY $vgpr2
   %2:_(s32) = COPY $vgpr3
@@ -31,12 +31,12 @@ body: |
 
     ; CHECK-LABEL: name: test_atomic_cmpxchg_with_success_s32_flat
     ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
-    ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32))
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]]
-    ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32))
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]]
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
   %0:_(p0) = COPY $vgpr0_vgpr1
   %1:_(s32) = COPY $vgpr2
   %2:_(s32) = COPY $vgpr3
@@ -53,11 +53,11 @@ body: |
 
     ; CHECK-LABEL: name: test_atomic_cmpxchg_with_success_s32_local
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 3)
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[COPY1]]
-    ; CHECK: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 3)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[COPY1]]
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
   %0:_(p3) = COPY $vgpr0
   %1:_(s32) = COPY $vgpr1
   %2:_(s32) = COPY $vgpr2
@@ -74,12 +74,12 @@ body: |
 
     ; CHECK-LABEL: name: test_atomic_cmpxchg_with_success_s64_global
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY1]](s64)
-    ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s64), addrspace 1)
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s64), [[COPY1]]
-    ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY1]](s64)
+    ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s64), addrspace 1)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s64), [[COPY1]]
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1)
   %0:_(p1) = COPY $vgpr0_vgpr1
   %1:_(s64) = COPY $vgpr2_vgpr3
   %2:_(s64) = COPY $vgpr4_vgpr5
@@ -96,11 +96,11 @@ body: |
 
     ; CHECK-LABEL: name: test_atomic_cmpxchg_with_success_s64_local
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr3_vgpr4
-    ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic (s64), addrspace 3)
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s64), [[COPY1]]
-    ; CHECK: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr3_vgpr4
+    ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic (s64), addrspace 3)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s64), [[COPY1]]
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1)
   %0:_(p3) = COPY $vgpr0
   %1:_(s64) = COPY $vgpr1_vgpr2
   %2:_(s64) = COPY $vgpr3_vgpr4

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir
index b12cc3e5babc3..083828075c44d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir
@@ -10,9 +10,9 @@ body: |
     liveins: $sgpr0, $sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomic_cmpxchg_local_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = COPY $sgpr2
@@ -27,9 +27,9 @@ body: |
     liveins: $sgpr0, $sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomic_cmpxchg_local_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst (s64), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst (s64), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = COPY $sgpr2
@@ -44,10 +44,10 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3
     ; CHECK-LABEL: name: atomic_cmpxchg_global_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
-    ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = COPY $sgpr3
@@ -62,10 +62,10 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3
     ; CHECK-LABEL: name: atomic_cmpxchg_global_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
-    ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst (s64), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst (s64), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = COPY $sgpr3
@@ -81,10 +81,10 @@ body: |
 
     ; CHECK-LABEL: name: atomic_cmpxchg_flat_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
-    ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store seq_cst (s32))
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store seq_cst (s32))
     %0:_(p0) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = COPY $sgpr3
@@ -100,10 +100,10 @@ body: |
 
     ; CHECK-LABEL: name: atomic_cmpxchg_flat_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
-    ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store seq_cst (s64))
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store seq_cst (s64))
     %0:_(p0) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = COPY $sgpr3

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-add.mir
index c4139fe42617c..12ba5006e6723 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-add.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-add.mir
@@ -10,8 +10,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_add_global_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -25,8 +25,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_add_local_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 3)
@@ -40,8 +40,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_add_global_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -55,8 +55,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_add_local_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 3)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-and.mir
index 6c169313c8932..3cb55a5de4567 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-and.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-and.mir
@@ -10,8 +10,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_and_global_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -25,8 +25,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_and_local_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 3)
@@ -40,8 +40,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_and_global_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -55,8 +55,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_and_local_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 3)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir
index 8517e0f62a3ea..195ab02571bfd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir
@@ -14,8 +14,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_fadd_global_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 1)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir
index 958faafc9cc6a..0d3ee3f69ab2f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir
@@ -14,8 +14,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_fadd_local_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 3)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-max.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-max.mir
index 13f1c40edd7b0..6764744db5bcd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-max.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-max.mir
@@ -10,8 +10,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_max_global_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -25,8 +25,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_max_local_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 3)
@@ -40,8 +40,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_max_global_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -55,8 +55,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_max_local_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 3)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-min.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-min.mir
index 8077e22156993..48d556a18fe96 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-min.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-min.mir
@@ -10,8 +10,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_min_global_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -25,8 +25,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_min_local_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 3)
@@ -40,8 +40,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_min_global_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -55,8 +55,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_min_local_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 3)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-or.mir
index 4658120255286..d6522fcdc184f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-or.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-or.mir
@@ -10,8 +10,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_or_global_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -25,8 +25,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_or_local_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 3)
@@ -40,8 +40,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_or_global_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -55,8 +55,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_or_local_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 3)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-sub.mir
index 0c9c79a7cfa77..1104868d3ebd0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-sub.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-sub.mir
@@ -10,8 +10,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_sub_global_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -25,8 +25,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_sub_local_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 3)
@@ -40,8 +40,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_sub_global_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -55,8 +55,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_sub_local_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 3)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umax.mir
index aa5954800113d..9cedf4d044399 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umax.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umax.mir
@@ -10,8 +10,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_umax_global_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -25,8 +25,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_umax_local_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 3)
@@ -40,8 +40,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_umax_global_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -55,8 +55,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_umax_local_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 3)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umin.mir
index 0c09a67cb76ec..300b389b3c7f5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umin.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umin.mir
@@ -10,8 +10,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_umin_global_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -25,8 +25,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_umin_local_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 3)
@@ -40,8 +40,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_umin_global_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -55,8 +55,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_umin_local_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 3)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg-flat.mir
index 7891e2f9e469f..adaaa5926f226 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg-flat.mir
@@ -13,8 +13,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_xchg_flat_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32))
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32))
     %0:_(p0) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 0)
@@ -28,8 +28,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_xchg_flat_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32))
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32))
     %0:_(p0) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 0)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg.mir
index 567d7c856052f..5cd6ec5d45219 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg.mir
@@ -10,8 +10,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_xchg_global_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -25,8 +25,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_xchg_local_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 3)
@@ -40,8 +40,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_xchg_global_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -55,8 +55,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_xchg_local_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 3)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xor.mir
index 06781a0a2e520..aba855de234eb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xor.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xor.mir
@@ -10,8 +10,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_xor_global_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -25,8 +25,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_xor_local_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 3)
@@ -40,8 +40,8 @@ body: |
     liveins: $sgpr0_sgpr1, $sgpr2
     ; CHECK-LABEL: name: atomicrmw_xor_global_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 1)
@@ -55,8 +55,8 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; CHECK-LABEL: name: atomicrmw_xor_local_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
-    ; CHECK: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
     %0:_(p3) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 3)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir
index 25d54ebffa629..78f2b0d8c34ff 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir
@@ -10,9 +10,9 @@ body: |
 
     ; GFX9-LABEL: name: legal_s32_to_v2s16
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
-    ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
+    ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir
index 5b53a8c342644..0491c7050fd16 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir
@@ -8,9 +8,9 @@ body: |
     liveins: $vgpr0, $vgpr1
     ; CHECK-LABEL: name: legal_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1
@@ -23,10 +23,10 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2
     ; CHECK-LABEL: name: legal_v3s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -40,11 +40,11 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
     ; CHECK-LABEL: name: legal_v4s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -59,12 +59,12 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
     ; CHECK-LABEL: name: legal_v5s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<5 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<5 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -80,13 +80,13 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
     ; CHECK-LABEL: name: legal_v6s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<6 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<6 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -103,14 +103,14 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
     ; CHECK-LABEL: name: legal_v7s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<7 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<7 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -128,15 +128,15 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
     ; CHECK-LABEL: name: legal_v8s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<8 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<8 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -155,16 +155,16 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
     ; CHECK-LABEL: name: legal_v9s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<9 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<9 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<9 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<9 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -184,17 +184,17 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
     ; CHECK-LABEL: name: legal_v10s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<10 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<10 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<10 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<10 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -215,18 +215,18 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
     ; CHECK-LABEL: name: legal_v11s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<11 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<11 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+    ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<11 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<11 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -248,19 +248,19 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
     ; CHECK-LABEL: name: legal_v12s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<12 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<12 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+    ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+    ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<12 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<12 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -283,20 +283,20 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12
     ; CHECK-LABEL: name: legal_v13s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-    ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<13 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<13 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+    ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+    ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+    ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<13 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<13 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -320,21 +320,21 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13
     ; CHECK-LABEL: name: legal_v14s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-    ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-    ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<14 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<14 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+    ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+    ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+    ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+    ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<14 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<14 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -359,22 +359,22 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14
     ; CHECK-LABEL: name: legal_v15s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-    ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-    ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-    ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<15 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<15 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+    ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+    ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+    ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+    ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+    ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<15 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<15 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -400,23 +400,23 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
     ; CHECK-LABEL: name: legal_v16s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-    ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-    ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-    ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-    ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<16 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+    ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+    ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+    ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+    ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+    ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+    ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<16 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -443,39 +443,39 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31
     ; CHECK-LABEL: name: legal_v32s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-    ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-    ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-    ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-    ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-    ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-    ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-    ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-    ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-    ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-    ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-    ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-    ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-    ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-    ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-    ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-    ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-    ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-    ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-    ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-    ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<32 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+    ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+    ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+    ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+    ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+    ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+    ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+    ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+    ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+    ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+    ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+    ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+    ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+    ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+    ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+    ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+    ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+    ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+    ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+    ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+    ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+    ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+    ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<32 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -518,9 +518,9 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; CHECK-LABEL: name: legal_v2s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s64>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s64>)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1
@@ -533,10 +533,10 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
     ; CHECK-LABEL: name: legal_v3s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = COPY $vgpr4_vgpr5
@@ -550,11 +550,11 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7
     ; CHECK-LABEL: name: legal_v4s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s64>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s64>)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = COPY $vgpr4_vgpr5
@@ -569,12 +569,12 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr8_vgpr9
     ; CHECK-LABEL: name: legal_v5s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64), [[COPY4]](s64)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<5 x s64>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64), [[COPY4]](s64)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<5 x s64>)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = COPY $vgpr4_vgpr5
@@ -590,13 +590,13 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11
     ; CHECK-LABEL: name: legal_v6s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY $vgpr10_vgpr11
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64), [[COPY4]](s64), [[COPY5]](s64)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<6 x s64>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $vgpr10_vgpr11
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64), [[COPY4]](s64), [[COPY5]](s64)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<6 x s64>)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = COPY $vgpr4_vgpr5
@@ -613,14 +613,14 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11, $vgpr12_vgpr13
     ; CHECK-LABEL: name: legal_v7s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY $vgpr10_vgpr11
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $vgpr12_vgpr13
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64), [[COPY4]](s64), [[COPY5]](s64), [[COPY6]](s64)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<7 x s64>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $vgpr10_vgpr11
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $vgpr12_vgpr13
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64), [[COPY4]](s64), [[COPY5]](s64), [[COPY6]](s64)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<7 x s64>)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = COPY $vgpr4_vgpr5
@@ -638,15 +638,15 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr14_vgpr15
     ; CHECK-LABEL: name: legal_v8s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY $vgpr10_vgpr11
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $vgpr12_vgpr13
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $vgpr14_vgpr15
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64), [[COPY4]](s64), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<8 x s64>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $vgpr10_vgpr11
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $vgpr12_vgpr13
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY $vgpr14_vgpr15
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64), [[COPY4]](s64), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<8 x s64>)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = COPY $vgpr4_vgpr5
@@ -666,23 +666,23 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr14_vgpr15, $vgpr16_vgpr17, $vgpr18_vgpr19, $vgpr20_vgpr21, $vgpr22_vgpr23, $vgpr24_vgpr25, $vgpr26_vgpr27, $vgpr28_vgpr29, $vgpr30_vgpr31
     ; CHECK-LABEL: name: legal_v16s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY $vgpr10_vgpr11
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $vgpr12_vgpr13
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $vgpr14_vgpr15
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s64) = COPY $vgpr16_vgpr17
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s64) = COPY $vgpr18_vgpr19
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s64) = COPY $vgpr20_vgpr21
-    ; CHECK: [[COPY11:%[0-9]+]]:_(s64) = COPY $vgpr22_vgpr23
-    ; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY $vgpr24_vgpr25
-    ; CHECK: [[COPY13:%[0-9]+]]:_(s64) = COPY $vgpr26_vgpr27
-    ; CHECK: [[COPY14:%[0-9]+]]:_(s64) = COPY $vgpr28_vgpr29
-    ; CHECK: [[COPY15:%[0-9]+]]:_(s64) = COPY $vgpr30_vgpr31
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64), [[COPY4]](s64), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64), [[COPY9]](s64), [[COPY10]](s64), [[COPY11]](s64), [[COPY12]](s64), [[COPY13]](s64), [[COPY14]](s64), [[COPY15]](s64)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<16 x s64>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $vgpr10_vgpr11
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $vgpr12_vgpr13
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY $vgpr14_vgpr15
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY $vgpr16_vgpr17
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY $vgpr18_vgpr19
+    ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY $vgpr20_vgpr21
+    ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY $vgpr22_vgpr23
+    ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY $vgpr24_vgpr25
+    ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY $vgpr26_vgpr27
+    ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY $vgpr28_vgpr29
+    ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s64) = COPY $vgpr30_vgpr31
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64), [[COPY4]](s64), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64), [[COPY9]](s64), [[COPY10]](s64), [[COPY11]](s64), [[COPY12]](s64), [[COPY13]](s64), [[COPY14]](s64), [[COPY15]](s64)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<16 x s64>)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = COPY $vgpr4_vgpr5
@@ -711,9 +711,9 @@ body: |
 
     ; CHECK-LABEL: name: legal_v2s128
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[COPY]](s128), [[COPY1]](s128)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s128>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[COPY]](s128), [[COPY1]](s128)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s128>)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<2 x s128>) = G_BUILD_VECTOR %0, %1
@@ -727,9 +727,9 @@ body: |
     liveins: $vgpr0, $vgpr1
     ; CHECK-LABEL: name: legal_v2p3
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x p3>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x p3>)
     %0:_(p3) = COPY $vgpr0
     %1:_(p3) = COPY $vgpr1
     %2:_(<2 x p3>) = G_BUILD_VECTOR %0, %1
@@ -742,10 +742,10 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2
     ; CHECK-LABEL: name: legal_v3p3
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3), [[COPY2]](p3)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x p3>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3), [[COPY2]](p3)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x p3>)
     %0:_(p3) = COPY $vgpr0
     %1:_(p3) = COPY $vgpr1
     %2:_(p3) = COPY $vgpr2
@@ -760,9 +760,9 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; CHECK-LABEL: name: legal_v2p0
     ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x p0>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x p0>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(p0) = COPY $vgpr2_vgpr3
     %2:_(<2 x p0>) = G_BUILD_VECTOR %0, %1
@@ -776,9 +776,9 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; CHECK-LABEL: name: legal_v2p999
     ; CHECK: [[COPY:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p999>) = G_BUILD_VECTOR [[COPY]](p999), [[COPY1]](p999)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x p999>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p999>) = G_BUILD_VECTOR [[COPY]](p999), [[COPY1]](p999)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x p999>)
     %0:_(p999) = COPY $vgpr0_vgpr1
     %1:_(p999) = COPY $vgpr2_vgpr3
     %2:_(<2 x p999>) = G_BUILD_VECTOR %0, %1
@@ -793,9 +793,9 @@ body: |
 
     ; CHECK-LABEL: name: legal_v2s256
     ; CHECK: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s256>) = G_BUILD_VECTOR [[COPY]](s256), [[COPY1]](s256)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s256>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s256>) = G_BUILD_VECTOR [[COPY]](s256), [[COPY1]](s256)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s256>)
     %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     %1:_(s256) = COPY $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     %2:_(<2 x s256>) = G_BUILD_VECTOR %0, %1
@@ -810,11 +810,11 @@ body: |
 
     ; CHECK-LABEL: name: legal_v4s128
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s128) = COPY $vgpr12_vgpr13_vgpr14_vgpr15
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s128>) = G_BUILD_VECTOR [[COPY]](s128), [[COPY1]](s128), [[COPY2]](s128), [[COPY3]](s128)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s128>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s128) = COPY $vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s128>) = G_BUILD_VECTOR [[COPY]](s128), [[COPY1]](s128), [[COPY2]](s128), [[COPY3]](s128)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s128>)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir
index 416f83fafce4e..6fe026b37bf87 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir
@@ -12,20 +12,20 @@ body: |
 
     ; GFX78-LABEL: name: build_vector_v2s16
     ; GFX78: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX78: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX78: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX78: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; GFX78: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX78: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX78: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; GFX78: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX78: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; GFX78: S_NOP 0, implicit [[BITCAST]](<2 x s16>)
+    ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX78-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX78-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX78-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; GFX78-NEXT: S_NOP 0, implicit [[BITCAST]](<2 x s16>)
     ; GFX9-LABEL: name: build_vector_v2s16
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
-    ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
+    ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -42,36 +42,36 @@ body: |
 
     ; GFX78-LABEL: name: build_vector_v3s16
     ; GFX78: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX78: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX78: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX78: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX78: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; GFX78: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX78: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX78: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; GFX78: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX78: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; GFX78: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; GFX78: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; GFX78: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
-    ; GFX78: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX78: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; GFX78: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX78: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; GFX78: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; GFX78: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX78: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; GFX78: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
-    ; GFX78: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX78-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX78-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX78-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX78-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; GFX78-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; GFX78-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX78-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX78-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; GFX78-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX78-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; GFX78-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX78-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX78-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
+    ; GFX78-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: build_vector_v3s16
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -91,32 +91,32 @@ body: |
 
     ; GFX78-LABEL: name: build_vector_v4s16
     ; GFX78: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX78: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX78: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX78: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; GFX78: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX78: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; GFX78: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX78: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX78: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; GFX78: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX78: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; GFX78: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; GFX78: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
-    ; GFX78: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
-    ; GFX78: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX78: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; GFX78: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; GFX78: S_NOP 0, implicit [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX78-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX78-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; GFX78-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX78-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX78-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; GFX78-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; GFX78-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; GFX78-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX78-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; GFX78-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; GFX78-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-LABEL: name: build_vector_v4s16
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: S_NOP 0, implicit [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -137,52 +137,52 @@ body: |
 
     ; GFX78-LABEL: name: build_vector_v5s16
     ; GFX78: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX78: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX78: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX78: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; GFX78: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; GFX78: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX78: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; GFX78: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX78: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX78: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; GFX78: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX78: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; GFX78: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; GFX78: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
-    ; GFX78: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
-    ; GFX78: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX78: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; GFX78: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
-    ; GFX78: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; GFX78: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; GFX78: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX78: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; GFX78: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX78: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; GFX78: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
-    ; GFX78: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX78: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; GFX78: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
-    ; GFX78: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
-    ; GFX78: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
-    ; GFX78: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; GFX78: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; GFX78: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; GFX78: S_NOP 0, implicit [[CONCAT_VECTORS]](<10 x s16>)
+    ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX78-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX78-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; GFX78-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; GFX78-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX78-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX78-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; GFX78-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; GFX78-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; GFX78-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX78-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; GFX78-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
+    ; GFX78-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX78-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX78-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX78-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX78-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; GFX78-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX78-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; GFX78-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; GFX78-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
+    ; GFX78-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; GFX78-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; GFX78-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; GFX78-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<10 x s16>)
     ; GFX9-LABEL: name: build_vector_v5s16
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
-    ; GFX9: S_NOP 0, implicit [[CONCAT_VECTORS]](<10 x s16>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
+    ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<10 x s16>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -206,68 +206,68 @@ body: |
 
     ; GFX78-LABEL: name: build_vector_v7s16
     ; GFX78: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX78: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX78: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX78: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; GFX78: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; GFX78: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; GFX78: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; GFX78: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX78: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; GFX78: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX78: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX78: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; GFX78: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX78: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; GFX78: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; GFX78: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
-    ; GFX78: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
-    ; GFX78: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX78: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; GFX78: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
-    ; GFX78: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]]
-    ; GFX78: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; GFX78: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX78: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; GFX78: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]]
-    ; GFX78: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; GFX78: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
-    ; GFX78: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX78: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; GFX78: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX78: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; GFX78: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
-    ; GFX78: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; GFX78: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; GFX78: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
-    ; GFX78: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
-    ; GFX78: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32)
-    ; GFX78: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; GFX78: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; GFX78: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]]
-    ; GFX78: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]]
-    ; GFX78: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
-    ; GFX78: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]]
-    ; GFX78: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
-    ; GFX78: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
-    ; GFX78: S_NOP 0, implicit [[CONCAT_VECTORS]](<14 x s16>)
+    ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX78-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX78-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; GFX78-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; GFX78-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; GFX78-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; GFX78-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX78-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX78-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; GFX78-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; GFX78-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; GFX78-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX78-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; GFX78-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
+    ; GFX78-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]]
+    ; GFX78-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX78-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX78-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]]
+    ; GFX78-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX78-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX78-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; GFX78-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX78-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; GFX78-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; GFX78-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; GFX78-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; GFX78-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
+    ; GFX78-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; GFX78-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; GFX78-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]]
+    ; GFX78-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]]
+    ; GFX78-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]]
+    ; GFX78-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+    ; GFX78-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; GFX78-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<14 x s16>)
     ; GFX9-LABEL: name: build_vector_v7s16
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>)
-    ; GFX9: S_NOP 0, implicit [[CONCAT_VECTORS]](<14 x s16>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>)
+    ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<14 x s16>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -295,52 +295,52 @@ body: |
 
     ; GFX78-LABEL: name: build_vector_v8s16
     ; GFX78: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX78: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX78: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX78: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; GFX78: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; GFX78: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; GFX78: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; GFX78: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-    ; GFX78: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX78: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; GFX78: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX78: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX78: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; GFX78: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX78: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; GFX78: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; GFX78: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
-    ; GFX78: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
-    ; GFX78: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX78: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; GFX78: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
-    ; GFX78: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]]
-    ; GFX78: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; GFX78: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX78: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; GFX78: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]]
-    ; GFX78: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C]]
-    ; GFX78: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
-    ; GFX78: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX78: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; GFX78: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
-    ; GFX78: S_NOP 0, implicit [[CONCAT_VECTORS]](<8 x s16>)
+    ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX78-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX78-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; GFX78-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; GFX78-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; GFX78-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; GFX78-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+    ; GFX78-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX78-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX78-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; GFX78-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; GFX78-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; GFX78-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX78-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; GFX78-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
+    ; GFX78-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]]
+    ; GFX78-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX78-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX78-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]]
+    ; GFX78-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C]]
+    ; GFX78-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX78-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; GFX78-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+    ; GFX78-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<8 x s16>)
     ; GFX9-LABEL: name: build_vector_v8s16
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9: S_NOP 0, implicit [[CONCAT_VECTORS]](<8 x s16>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<8 x s16>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -369,92 +369,92 @@ body: |
 
     ; GFX78-LABEL: name: build_vector_v16s16
     ; GFX78: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX78: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX78: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX78: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; GFX78: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; GFX78: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; GFX78: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; GFX78: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-    ; GFX78: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-    ; GFX78: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-    ; GFX78: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-    ; GFX78: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-    ; GFX78: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-    ; GFX78: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-    ; GFX78: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-    ; GFX78: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-    ; GFX78: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX78: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; GFX78: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX78: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX78: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; GFX78: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX78: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; GFX78: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; GFX78: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
-    ; GFX78: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
-    ; GFX78: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX78: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; GFX78: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
-    ; GFX78: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]]
-    ; GFX78: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; GFX78: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX78: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; GFX78: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]]
-    ; GFX78: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C]]
-    ; GFX78: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
-    ; GFX78: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX78: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; GFX78: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C]]
-    ; GFX78: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C]]
-    ; GFX78: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
-    ; GFX78: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; GFX78: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; GFX78: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
-    ; GFX78: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
-    ; GFX78: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32)
-    ; GFX78: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; GFX78: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; GFX78: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]]
-    ; GFX78: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]]
-    ; GFX78: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
-    ; GFX78: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]]
-    ; GFX78: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
-    ; GFX78: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]]
-    ; GFX78: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]]
-    ; GFX78: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C1]](s32)
-    ; GFX78: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL7]]
-    ; GFX78: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
-    ; GFX78: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
-    ; GFX78: S_NOP 0, implicit [[CONCAT_VECTORS]](<16 x s16>)
+    ; GFX78-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX78-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX78-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; GFX78-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; GFX78-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; GFX78-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; GFX78-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+    ; GFX78-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+    ; GFX78-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+    ; GFX78-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+    ; GFX78-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+    ; GFX78-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+    ; GFX78-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+    ; GFX78-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+    ; GFX78-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+    ; GFX78-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX78-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; GFX78-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX78-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX78-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX78-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; GFX78-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; GFX78-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; GFX78-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX78-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; GFX78-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
+    ; GFX78-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]]
+    ; GFX78-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX78-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX78-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]]
+    ; GFX78-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C]]
+    ; GFX78-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX78-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; GFX78-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C]]
+    ; GFX78-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C]]
+    ; GFX78-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; GFX78-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; GFX78-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
+    ; GFX78-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
+    ; GFX78-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; GFX78-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; GFX78-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]]
+    ; GFX78-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]]
+    ; GFX78-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]]
+    ; GFX78-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+    ; GFX78-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]]
+    ; GFX78-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]]
+    ; GFX78-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C1]](s32)
+    ; GFX78-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL7]]
+    ; GFX78-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; GFX78-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+    ; GFX78-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<16 x s16>)
     ; GFX9-LABEL: name: build_vector_v16s16
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-    ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-    ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-    ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-    ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-    ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-    ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-    ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-    ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
-    ; GFX9: S_NOP 0, implicit [[CONCAT_VECTORS]](<16 x s16>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+    ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+    ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+    ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+    ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+    ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+    ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+    ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+    ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
+    ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<16 x s16>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir
index 125fb384384e4..792d064567e68 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir
@@ -9,9 +9,9 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; CHECK-LABEL: name: concat_vectors_v2s32_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<4 x s32>) = G_CONCAT_VECTORS %0, %1
@@ -26,9 +26,9 @@ body: |
     liveins: $vgpr0, $vgpr1
     ; CHECK-LABEL: name: concat_vectors_v2s16_v2s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
@@ -43,11 +43,11 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
     ; CHECK-LABEL: name: concat_vectors_v2s16_v2s16_v2s16_v2s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<2 x s16>) = COPY $vgpr2
@@ -64,9 +64,9 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; CHECK-LABEL: name: concat_vectors_v4s16_v4s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
     %2:_(<8 x s16>) = G_CONCAT_VECTORS %0, %1
@@ -81,9 +81,9 @@ body: |
     liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
     ; CHECK-LABEL: name: concat_vectors_v4s32_v4s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3,
     %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<8 x s32>) = G_CONCAT_VECTORS %0, %1
@@ -98,11 +98,11 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7
     ; CHECK-LABEL: name: concat_vectors_v2s32_v2s32_v2s32_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
-    ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr6_vgpr7
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>), [[COPY2]](<2 x s32>), [[COPY3]](<2 x s32>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr6_vgpr7
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>), [[COPY2]](<2 x s32>), [[COPY3]](<2 x s32>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s32>) = COPY $vgpr4_vgpr5
@@ -119,9 +119,9 @@ body: |
     liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
     ; CHECK-LABEL: name: concat_vectors_v2s64_v2s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY]](<2 x s64>), [[COPY1]](<2 x s64>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY]](<2 x s64>), [[COPY1]](<2 x s64>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<4 x s64>) = G_CONCAT_VECTORS %0, %1
@@ -136,9 +136,9 @@ body: |
     liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
     ; CHECK-LABEL: name: concat_vectors_v2p1_v2p1
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p1>) = G_CONCAT_VECTORS [[COPY]](<2 x p1>), [[COPY1]](<2 x p1>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x p1>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p1>) = G_CONCAT_VECTORS [[COPY]](<2 x p1>), [[COPY1]](<2 x p1>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x p1>)
     %0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x p1>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<4 x p1>) = G_CONCAT_VECTORS %0, %1
@@ -153,9 +153,9 @@ body: |
     liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
     ; CHECK-LABEL: name: concat_vectors_v2p0_v2p0
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p0>) = G_CONCAT_VECTORS [[COPY]](<2 x p0>), [[COPY1]](<2 x p0>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x p0>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p0>) = G_CONCAT_VECTORS [[COPY]](<2 x p0>), [[COPY1]](<2 x p0>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x p0>)
     %0:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x p0>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<4 x p0>) = G_CONCAT_VECTORS %0, %1
@@ -170,9 +170,9 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; CHECK-LABEL: name: concat_vectors_v2p3_v2p3
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p3>) = G_CONCAT_VECTORS [[COPY]](<2 x p3>), [[COPY1]](<2 x p3>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x p3>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p3>) = G_CONCAT_VECTORS [[COPY]](<2 x p3>), [[COPY1]](<2 x p3>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x p3>)
     %0:_(<2 x p3>) = COPY $vgpr0_vgpr1
     %1:_(<2 x p3>) = COPY $vgpr2_vgpr3
     %2:_(<4 x p3>) = G_CONCAT_VECTORS %0, %1
@@ -187,9 +187,9 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; CHECK-LABEL: name: concat_vectors_v2p5_v2p5
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p5>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p5>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p5>) = G_CONCAT_VECTORS [[COPY]](<2 x p5>), [[COPY1]](<2 x p5>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x p5>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p5>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p5>) = G_CONCAT_VECTORS [[COPY]](<2 x p5>), [[COPY1]](<2 x p5>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x p5>)
     %0:_(<2 x p5>) = COPY $vgpr0_vgpr1
     %1:_(<2 x p5>) = COPY $vgpr2_vgpr3
     %2:_(<4 x p5>) = G_CONCAT_VECTORS %0, %1
@@ -204,9 +204,9 @@ body: |
     liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
     ; CHECK-LABEL: name: concat_vectors_v2p999_v2p999
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p999>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p999>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p999>) = G_CONCAT_VECTORS [[COPY]](<2 x p999>), [[COPY1]](<2 x p999>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x p999>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p999>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p999>) = G_CONCAT_VECTORS [[COPY]](<2 x p999>), [[COPY1]](<2 x p999>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x p999>)
     %0:_(<2 x p999>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x p999>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<4 x p999>) = G_CONCAT_VECTORS %0, %1
@@ -221,34 +221,34 @@ body: |
     liveins: $vgpr0_vgpr1_vgpr2
     ; CHECK-LABEL: name: concat_vectors_v6s16_v3s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0
     %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir
index e60b213d6e09e..fa34420b5ae92 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir
@@ -8,7 +8,7 @@ body: |
 
     ; CHECK-LABEL: name: test_constant_s32
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CHECK: $vgpr0 = COPY [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
     %0:_(s32) = G_CONSTANT i32 5
     $vgpr0 = COPY %0
 ...
@@ -19,7 +19,7 @@ body: |
 
     ; CHECK-LABEL: name: test_constant_s64
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CHECK: $vgpr0_vgpr1 = COPY [[C]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](s64)
     %0:_(s64) = G_CONSTANT i64 5
     $vgpr0_vgpr1 = COPY %0
 
@@ -32,10 +32,10 @@ body: |
 
     ; CHECK-LABEL: name: test_constant_s96
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -4780896129847249538
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -547834910
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
-    ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[C1]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -547834910
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[C1]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
     %0:_(s96) = G_CONSTANT i96  -10105770365747857631829412482
     $vgpr0_vgpr1_vgpr2 = COPY %0
 
@@ -48,7 +48,7 @@ body: |
 
     ; CHECK-LABEL: name: test_constant_s1
     ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
-    ; CHECK: S_ENDPGM 0, implicit [[C]](s1)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[C]](s1)
     %1:_(s1) = G_CONSTANT i1 0
     S_ENDPGM 0, implicit %1
 ...
@@ -60,7 +60,7 @@ body: |
 
     ; CHECK-LABEL: name: test_constant_s7
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CHECK: $vgpr0 = COPY [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
     %0:_(s7) = G_CONSTANT i7 5
     %1:_(s32) = G_ANYEXT %0
     $vgpr0 = COPY %1
@@ -73,7 +73,7 @@ body: |
 
     ; CHECK-LABEL: name: test_constant_s8
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CHECK: $vgpr0 = COPY [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
     %0:_(s8) = G_CONSTANT i8 5
     %1:_(s32) = G_ANYEXT %0
     $vgpr0 = COPY %1
@@ -86,7 +86,7 @@ body: |
 
     ; CHECK-LABEL: name: test_constant_s16
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CHECK: $vgpr0 = COPY [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
     %0:_(s16) = G_CONSTANT i16 5
     %1:_(s32) = G_ANYEXT %0
     $vgpr0 = COPY %1
@@ -99,9 +99,9 @@ body: |
 
     ; CHECK-LABEL: name: test_constant_s128
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](s64), [[C1]](s64)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](s64), [[C1]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
     %0:_(s128) = G_CONSTANT i128 5
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0
 ...
@@ -113,7 +113,7 @@ body: |
 
     ; CHECK-LABEL: name: test_constant_p0
     ; CHECK: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[C]](p0)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](p0)
     %0:_(p0) = G_CONSTANT i64 0
     $vgpr0_vgpr1 = COPY %0
 ...
@@ -125,7 +125,7 @@ body: |
 
     ; CHECK-LABEL: name: test_constant_p1
     ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[C]](p1)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](p1)
     %0:_(p1) = G_CONSTANT i64 0
     $vgpr0_vgpr1 = COPY %0
 ...
@@ -137,7 +137,7 @@ body: |
 
     ; CHECK-LABEL: name: test_constant_p2
     ; CHECK: [[C:%[0-9]+]]:_(p2) = G_CONSTANT i32 0
-    ; CHECK: $vgpr0 = COPY [[C]](p2)
+    ; CHECK-NEXT: $vgpr0 = COPY [[C]](p2)
     %0:_(p2) = G_CONSTANT i32 0
     $vgpr0 = COPY %0
 ...
@@ -149,7 +149,7 @@ body: |
 
     ; CHECK-LABEL: name: test_constant_p3
     ; CHECK: [[C:%[0-9]+]]:_(p2) = G_CONSTANT i32 0
-    ; CHECK: $vgpr0 = COPY [[C]](p2)
+    ; CHECK-NEXT: $vgpr0 = COPY [[C]](p2)
     %0:_(p2) = G_CONSTANT i32 0
     $vgpr0 = COPY %0
 ...
@@ -161,7 +161,7 @@ body: |
 
     ; CHECK-LABEL: name: test_constant_p4
     ; CHECK: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[C]](p4)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](p4)
     %0:_(p4) = G_CONSTANT i64 0
     $vgpr0_vgpr1 = COPY %0
 ...
@@ -173,7 +173,7 @@ body: |
 
     ; CHECK-LABEL: name: test_constant_p5
     ; CHECK: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0
-    ; CHECK: $vgpr0 = COPY [[C]](p5)
+    ; CHECK-NEXT: $vgpr0 = COPY [[C]](p5)
     %0:_(p5) = G_CONSTANT i32 0
     $vgpr0 = COPY %0
 ...
@@ -185,7 +185,7 @@ body: |
 
     ; CHECK-LABEL: name: test_constant_p999
     ; CHECK: [[C:%[0-9]+]]:_(p999) = G_CONSTANT i64 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[C]](p999)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](p999)
     %0:_(p999) = G_CONSTANT i64 0
     $vgpr0_vgpr1 = COPY %0
 ...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir
index 58b36109266e0..e8e863a964c93 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir
@@ -9,8 +9,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_s64_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s64), 0
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s64), 0
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_EXTRACT %0, 0
     $vgpr0 = COPY %1
@@ -23,8 +23,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_s64_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s64), 32
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s64), 32
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_EXTRACT %0, 32
      $vgpr0 = COPY %1
@@ -38,8 +38,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s8_s15_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
-    ; CHECK: $vgpr0 = COPY [[TRUNC]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s31) = G_TRUNC %0
     %2:_(s8) = G_EXTRACT %1, 0
@@ -55,8 +55,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s16_s31_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
-    ; CHECK: $vgpr0 = COPY [[TRUNC]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s31) = G_TRUNC %0
     %2:_(s16) = G_EXTRACT %1, 0
@@ -72,8 +72,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_s48_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s64), 0
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s64), 0
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s48) = G_TRUNC %0
     %2:_(s32) = G_EXTRACT %1, 0
@@ -88,8 +88,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_s96_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 0
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 0
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s32) = G_EXTRACT %0, 0
     $vgpr0 = COPY %1
@@ -102,8 +102,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_s96_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 32
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 32
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s32) = G_EXTRACT %0, 32
     $vgpr0 = COPY %1
@@ -116,8 +116,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_s96_offset64
     ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s32) = G_EXTRACT %0, 64
     $vgpr0 = COPY %1
@@ -130,8 +130,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_s128_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 0
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 0
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = G_EXTRACT %0, 0
     $vgpr0 = COPY %1
@@ -144,8 +144,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_s128_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 32
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 32
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = G_EXTRACT %0, 32
     $vgpr0 = COPY %1
@@ -158,8 +158,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_s128_offset64
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 64
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 64
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = G_EXTRACT %0, 64
     $vgpr0 = COPY %1
@@ -172,8 +172,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_s128_offset96
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 96
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 96
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = G_EXTRACT %0, 96
     $vgpr0 = COPY %1
@@ -187,8 +187,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_v2s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_EXTRACT %0, 0
     $vgpr0 = COPY %1
@@ -201,8 +201,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_v2s32_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_EXTRACT %0, 32
      $vgpr0 = COPY %1
@@ -215,8 +215,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_v3s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 0
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 0
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s32) = G_EXTRACT %0, 0
     $vgpr0 = COPY %1
@@ -229,8 +229,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_v3s32_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 32
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 32
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s32) = G_EXTRACT %0, 32
     $vgpr0 = COPY %1
@@ -243,8 +243,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_v3s32_offset64
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s32) = G_EXTRACT %0, 64
     $vgpr0 = COPY %1
@@ -257,8 +257,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_v4s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = G_EXTRACT %0, 0
     $vgpr0 = COPY %1
@@ -271,8 +271,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_v4s32_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 32
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 32
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = G_EXTRACT %0, 32
     $vgpr0 = COPY %1
@@ -285,8 +285,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_v4s32_offset64
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 64
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 64
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = G_EXTRACT %0, 64
     $vgpr0 = COPY %1
@@ -299,8 +299,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s32_v4s32_offset96
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 96
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s128), 96
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = G_EXTRACT %0, 96
     $vgpr0 = COPY %1
@@ -313,8 +313,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_v2s32_v4s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<4 x s32>), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](<2 x s32>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<4 x s32>), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](<2 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s32>) = G_EXTRACT %0, 0
     $vgpr0_vgpr1 = COPY %1
@@ -328,8 +328,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_v2s32_v4s32_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<4 x s32>), 32
-    ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](<2 x s32>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<4 x s32>), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](<2 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s32>) = G_EXTRACT %0, 32
     $vgpr0_vgpr1 = COPY %1
@@ -343,8 +343,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_v2s32_v4s32_offset64
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<4 x s32>), 64
-    ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](<2 x s32>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<4 x s32>), 64
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](<2 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s32>) = G_EXTRACT %0, 64
     $vgpr0_vgpr1 = COPY %1
@@ -357,8 +357,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s64_v4s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<4 x s32>), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<4 x s32>), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s64) = G_EXTRACT %0, 0
     $vgpr0_vgpr1 = COPY %1
@@ -372,8 +372,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s64_v4s32_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<4 x s32>), 32
-    ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<4 x s32>), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s64) = G_EXTRACT %0, 32
     $vgpr0_vgpr1 = COPY %1
@@ -387,8 +387,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_s64_v4s32_offset64
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<4 x s32>), 64
-    ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<4 x s32>), 64
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s64) = G_EXTRACT %0, 64
     $vgpr0_vgpr1 = COPY %1
@@ -401,8 +401,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_p0_v4s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(p0) = G_EXTRACT [[COPY]](<4 x s32>), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](p0)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(p0) = G_EXTRACT [[COPY]](<4 x s32>), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](p0)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(p0) = G_EXTRACT %0, 0
     $vgpr0_vgpr1 = COPY %1
@@ -416,8 +416,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_p0_v4s32_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(p0) = G_EXTRACT [[COPY]](<4 x s32>), 32
-    ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](p0)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(p0) = G_EXTRACT [[COPY]](<4 x s32>), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](p0)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(p0) = G_EXTRACT %0, 32
     $vgpr0_vgpr1 = COPY %1
@@ -431,8 +431,8 @@ body: |
 
     ; CHECK-LABEL: name: test_extract_p0_v4s32_offset64
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(p0) = G_EXTRACT [[COPY]](<4 x s32>), 64
-    ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](p0)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(p0) = G_EXTRACT [[COPY]](<4 x s32>), 64
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](p0)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(p0) = G_EXTRACT %0, 64
     $vgpr0_vgpr1 = COPY %1
@@ -444,13 +444,13 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s8_v4s8_offset0
     ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>)
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(<4 x s8>) = G_IMPLICIT_DEF
     %1:_(s8) = G_EXTRACT %0, 0
     %2:_(s32) = G_ANYEXT %1
@@ -463,13 +463,13 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s8_v4s8_offset8
     ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>)
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 16
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 16
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(<4 x s8>) = G_IMPLICIT_DEF
     %1:_(s8) = G_EXTRACT %0, 8
     %2:_(s32) = G_ANYEXT %1
@@ -482,13 +482,13 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s8_v4s8_offset16
     ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>)
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 32
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 32
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(<4 x s8>) = G_IMPLICIT_DEF
     %1:_(s8) = G_EXTRACT %0, 16
     %2:_(s32) = G_ANYEXT %1
@@ -501,13 +501,13 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s8_v4s8_offset24
     ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>)
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 48
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 48
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(<4 x s8>) = G_IMPLICIT_DEF
     %1:_(s8) = G_EXTRACT %0, 24
     %2:_(s32) = G_ANYEXT %1
@@ -521,27 +521,27 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s8_v3s8_offset16
     ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; CHECK: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV4]](<3 x s16>), 0
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 32
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+    ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV4]](<3 x s16>), 0
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 32
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(<3 x s8>) = G_IMPLICIT_DEF
     %1:_(s8) = G_EXTRACT %0, 16
     %2:_(s32) = G_ANYEXT %1
@@ -554,22 +554,22 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_s8_v5s1_offset4
     ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>)
-    ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
-    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
-    ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF1]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s32>)
-    ; CHECK: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[DEF2]](<2 x s16>), [[DEF2]](<2 x s16>)
-    ; CHECK: [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>)
-    ; CHECK: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF3]], [[UV5]](<5 x s16>), 0
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<6 x s16>), 64
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF1]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s32>)
+    ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[DEF2]](<2 x s16>), [[DEF2]](<2 x s16>)
+    ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>)
+    ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF3]], [[UV5]](<5 x s16>), 0
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<6 x s16>), 64
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(<5 x s1>) = G_IMPLICIT_DEF
     %1:_(s1) = G_EXTRACT %0, 4
     %2:_(s32) = G_ANYEXT %1
@@ -582,8 +582,8 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_v2s16_v4s16_offset32
     ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 32
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 32
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
     %0:_(<4 x s16>) = G_IMPLICIT_DEF
     %1:_(<2 x s16>) = G_EXTRACT %0, 32
     $vgpr0 = COPY %1
@@ -595,8 +595,8 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: extract_v2s16_v6s16_offset32
     ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<6 x s16>), 32
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<6 x s16>), 32
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
     %0:_(<6 x s16>) = G_IMPLICIT_DEF
     %1:_(<2 x s16>) = G_EXTRACT %0, 32
     $vgpr0 = COPY %1
@@ -609,7 +609,7 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_extract_s8_s16_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s16) = G_TRUNC %0
     %2:_(s8) = G_EXTRACT %1, 0
@@ -624,11 +624,11 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_extract_s8_s16_offset1
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16)
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s16) = G_TRUNC %0
     %2:_(s8) = G_EXTRACT %1, 1
@@ -643,11 +643,11 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_extract_s8_s16_offset8
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16)
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s16) = G_TRUNC %0
     %2:_(s8) = G_EXTRACT %1, 8
@@ -662,7 +662,7 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_extract_s8_s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_EXTRACT %0, 0
     %2:_(s32) = G_ANYEXT %1
@@ -676,9 +676,9 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_extract_s8_s32_offset1
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_EXTRACT %0, 1
     %2:_(s32) = G_ANYEXT %1
@@ -692,9 +692,9 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_extract_s8_s32_offset8
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_EXTRACT %0, 8
     %2:_(s32) = G_ANYEXT %1
@@ -708,9 +708,9 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_extract_s8_s32_offset16
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_EXTRACT %0, 16
     %2:_(s32) = G_ANYEXT %1
@@ -724,9 +724,9 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_extract_s8_s32_offset24
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_EXTRACT %0, 16
     %2:_(s32) = G_ANYEXT %1
@@ -740,8 +740,8 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_extract_s8_p3_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
-    ; CHECK: $vgpr0 = COPY [[PTRTOINT]](s32)
+    ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
+    ; CHECK-NEXT: $vgpr0 = COPY [[PTRTOINT]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s8) = G_EXTRACT %0, 0
     %2:_(s32) = G_ANYEXT %1
@@ -755,10 +755,10 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_extract_s8_p3_offset8
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s8) = G_EXTRACT %0, 8
     %2:_(s32) = G_ANYEXT %1
@@ -772,7 +772,7 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_extract_s1_s8_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_TRUNC %0
     %2:_(s1) = G_EXTRACT %1, 0
@@ -787,11 +787,11 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_extract_s1_s8_offset2
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16)
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_TRUNC %0
     %2:_(s1) = G_EXTRACT %1, 2
@@ -806,10 +806,10 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: test_extract_s8_s64_offset2
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
-    ; CHECK: $vgpr0 = COPY [[TRUNC]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s8) = G_EXTRACT %0, 2
     %2:_(s32) = G_ANYEXT %1
@@ -823,13 +823,13 @@ body: |
 
     ; CHECK-LABEL: name: extract_s16_v3s16_offset0
     ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 0
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 0
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(<3 x s16>) = G_IMPLICIT_DEF
     %1:_(s16) = G_EXTRACT %0, 0
     %2:_(s32) = G_ANYEXT %1
@@ -843,10 +843,10 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: test_extract_s8_s64_offset16
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
-    ; CHECK: $vgpr0 = COPY [[TRUNC]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s8) = G_EXTRACT %0, 16
     %2:_(s32) = G_ANYEXT %1
@@ -860,9 +860,9 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: test_extract_s16_s64_offset16
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](s64), 16
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](s64), 16
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s16) = G_EXTRACT %0, 16
     %2:_(s32) = G_ANYEXT %1
@@ -876,9 +876,9 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: test_extract_s16_s64_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](s64), 32
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](s64), 32
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s16) = G_EXTRACT %0, 32
     %2:_(s32) = G_ANYEXT %1
@@ -892,9 +892,9 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: test_extract_s16_s64_offset48
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](s64), 48
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](s64), 48
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s16) = G_EXTRACT %0, 48
     %2:_(s32) = G_ANYEXT %1
@@ -908,12 +908,12 @@ body: |
 
     ; CHECK-LABEL: name: extract_v2s16_v3s16_offset0
     ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
     %0:_(<3 x s16>) = G_IMPLICIT_DEF
     %1:_(<2 x s16>) = G_EXTRACT %0, 0
     $vgpr0 = COPY %1
@@ -926,12 +926,12 @@ body: |
 
     ; CHECK-LABEL: name: extract_v2s16_v5s16_offset0
     ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<6 x s16>), 0
-    ; CHECK: $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<6 x s16>), 0
+    ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
     %0:_(<5 x s16>) = G_IMPLICIT_DEF
     %1:_(<2 x s16>) = G_EXTRACT %0, 0
     $vgpr0 = COPY %1
@@ -945,8 +945,8 @@ body: |
 
     ; CHECK-LABEL: name: extract_s16_v2s16_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
-    ; CHECK: $vgpr0 = COPY [[BITCAST]](s32)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(s16) = G_EXTRACT %0, 0
     %2:_(s32) = G_ANYEXT %1
@@ -961,10 +961,10 @@ body: |
 
     ; CHECK-LABEL: name: extract_s16_v2s16_offset1
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(s16) = G_EXTRACT %0, 1
     %2:_(s32) = G_ANYEXT %1
@@ -979,10 +979,10 @@ body: |
 
     ; CHECK-LABEL: name: extract_s16_v2s16_offset8
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(s16) = G_EXTRACT %0, 8
     %2:_(s32) = G_ANYEXT %1
@@ -997,10 +997,10 @@ body: |
 
     ; CHECK-LABEL: name: extract_s16_v2s16_offset16
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(s16) = G_EXTRACT %0, 16
     %2:_(s32) = G_ANYEXT %1
@@ -1015,7 +1015,7 @@ body: |
 
     ; CHECK-LABEL: name: extract_s16_s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s16) = G_EXTRACT %0, 0
     %2:_(s32) = G_ANYEXT %1
@@ -1030,9 +1030,9 @@ body: |
 
     ; CHECK-LABEL: name: extract_s16_s32_offset1
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s16) = G_EXTRACT %0, 1
     %2:_(s32) = G_ANYEXT %1
@@ -1047,9 +1047,9 @@ body: |
 
     ; CHECK-LABEL: name: extract_s16_s32_offset8
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s16) = G_EXTRACT %0, 8
     %2:_(s32) = G_ANYEXT %1
@@ -1064,9 +1064,9 @@ body: |
 
     ; CHECK-LABEL: name: extract_s16_s32_offset16
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s16) = G_EXTRACT %0, 16
     %2:_(s32) = G_ANYEXT %1
@@ -1081,9 +1081,9 @@ body: |
 
     ; CHECK-LABEL: name: extract_s16_p3_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](p3), 0
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](p3), 0
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s16) = G_EXTRACT %0, 0
     %2:_(s32) = G_ANYEXT %1
@@ -1098,9 +1098,9 @@ body: |
 
     ; CHECK-LABEL: name: extract_s16_p3_offset1
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](p3), 1
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](p3), 1
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s16) = G_EXTRACT %0, 1
     %2:_(s32) = G_ANYEXT %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir
index cf115be6ea987..ddf5ec6db384b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir
@@ -9,10 +9,10 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_s1
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[TRUNC]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FREEZE]](s1)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[TRUNC]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FREEZE]](s1)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s1) = G_TRUNC %0
     %2:_(s1) = G_FREEZE %1
@@ -27,8 +27,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_s7
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0 = COPY [[FREEZE]](s32)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s7) = G_TRUNC %0
     %2:_(s7) = G_FREEZE %1
@@ -43,8 +43,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_s8
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0 = COPY [[FREEZE]](s32)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_TRUNC %0
     %2:_(s8) = G_FREEZE %1
@@ -59,10 +59,10 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_s16
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(s16) = G_FREEZE [[TRUNC]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FREEZE]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s16) = G_FREEZE [[TRUNC]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FREEZE]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s16) = G_TRUNC %0
     %2:_(s16) = G_FREEZE %1
@@ -77,8 +77,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0 = COPY [[FREEZE]](s32)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = G_FREEZE %0
     $vgpr0 = COPY %1
@@ -91,8 +91,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_s48
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s48) = G_TRUNC %0
     %2:_(s48) = G_FREEZE %1
@@ -107,8 +107,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_FREEZE %0
     $vgpr0_vgpr1 = COPY %1
@@ -121,14 +121,14 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_s65
     ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
-    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32)
-    ; CHECK: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64)
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(s128) = G_FREEZE [[MV2]]
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[FREEZE]](s128)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s128) = G_FREEZE [[MV2]]
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[FREEZE]](s128)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96)
     %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s65) = G_TRUNC %0
     %2:_(s65) = G_FREEZE %1
@@ -143,8 +143,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_s128
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(s128) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](s128)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s128) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s128) = G_FREEZE %0
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -157,8 +157,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_256
     ; CHECK: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(s256) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[FREEZE]](s256)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s256) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[FREEZE]](s256)
     %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     %1:_(s256) = G_FREEZE %0
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -171,12 +171,12 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_s448
     ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s448) = G_TRUNC [[COPY]](s512)
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(s448) = G_FREEZE [[TRUNC]]
-    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FREEZE]](s448)
-    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[DEF]](s64)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](s512)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s448) = G_TRUNC [[COPY]](s512)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s448) = G_FREEZE [[TRUNC]]
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FREEZE]](s448)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[DEF]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](s512)
     %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     %1:_(s448) = G_TRUNC %0
     %2:_(s448) = G_FREEZE %1
@@ -191,8 +191,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_s512
     ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(s512) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](s512)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s512) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](s512)
     %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     %1:_(s512) = G_FREEZE %0
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
@@ -205,11 +205,11 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_s1024
     ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s512)
-    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[UV7]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(s1024) = G_FREEZE [[MV]]
-    ; CHECK: S_NOP 0, implicit [[FREEZE]](s1024)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s512)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[UV7]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1024) = G_FREEZE [[MV]]
+    ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](s1024)
     %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     %1:_(s1024) = G_ANYEXT %0
     %2:_(s1024) = G_FREEZE %1
@@ -223,26 +223,26 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_s1056
     ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s512)
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
-    ; CHECK: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
-    ; CHECK: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
-    ; CHECK: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
-    ; CHECK: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
-    ; CHECK: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
-    ; CHECK: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
-    ; CHECK: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
-    ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
-    ; CHECK: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32)
-    ; CHECK: [[MV1:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV32]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
-    ; CHECK: [[DEF2:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(s1024) = G_FREEZE [[MV]]
-    ; CHECK: [[FREEZE1:%[0-9]+]]:_(s1024) = G_FREEZE [[MV1]]
-    ; CHECK: [[MV2:%[0-9]+]]:_(s33792) = G_MERGE_VALUES [[FREEZE]](s1024), [[FREEZE1]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s1056) = G_TRUNC [[MV2]](s33792)
-    ; CHECK: S_NOP 0, implicit [[TRUNC]](s1056)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s512)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
+    ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
+    ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
+    ; CHECK-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
+    ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
+    ; CHECK-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
+    ; CHECK-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
+    ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
+    ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32)
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV32]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1024) = G_FREEZE [[MV]]
+    ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s1024) = G_FREEZE [[MV1]]
+    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s33792) = G_MERGE_VALUES [[FREEZE]](s1024), [[FREEZE1]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1056) = G_TRUNC [[MV2]](s33792)
+    ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC]](s1056)
     %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     %1:_(s1056) = G_ANYEXT %0
     %2:_(s1056) = G_FREEZE %1
@@ -256,14 +256,14 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_s2048
     ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s512)
-    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[UV7]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
-    ; CHECK: [[MV1:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(s1024) = G_FREEZE [[MV]]
-    ; CHECK: [[FREEZE1:%[0-9]+]]:_(s1024) = G_FREEZE [[MV1]]
-    ; CHECK: [[MV2:%[0-9]+]]:_(s2048) = G_MERGE_VALUES [[FREEZE]](s1024), [[FREEZE1]](s1024)
-    ; CHECK: S_NOP 0, implicit [[MV2]](s2048)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s512)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[UV7]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1024) = G_FREEZE [[MV]]
+    ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s1024) = G_FREEZE [[MV1]]
+    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s2048) = G_MERGE_VALUES [[FREEZE]](s1024), [[FREEZE1]](s1024)
+    ; CHECK-NEXT: S_NOP 0, implicit [[MV2]](s2048)
     %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     %1:_(s2048) = G_ANYEXT %0
     %2:_(s2048) = G_FREEZE %1
@@ -277,8 +277,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](<2 x s32>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_FREEZE %0
     $vgpr0_vgpr1 = COPY %1
@@ -291,8 +291,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v3s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<3 x s32>) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<3 x s32>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<3 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<3 x s32>)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s32>) = G_FREEZE %0
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -305,8 +305,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v4s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<4 x s32>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<4 x s32>) = G_FREEZE %0
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -319,8 +319,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v5s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<5 x s32>) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[FREEZE]](<5 x s32>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<5 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[FREEZE]](<5 x s32>)
     %0:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
     %1:_(<5 x s32>) = G_FREEZE %0
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %1
@@ -333,8 +333,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v6s32
     ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<6 x s32>) = G_FREEZE [[DEF]]
-    ; CHECK: S_NOP 0, implicit [[FREEZE]](<6 x s32>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<6 x s32>) = G_FREEZE [[DEF]]
+    ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](<6 x s32>)
     %0:_(<6 x s32>) = G_IMPLICIT_DEF
     %1:_(<6 x s32>) = G_FREEZE %0
     S_NOP 0, implicit %1
@@ -347,8 +347,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v7s32
     ; CHECK: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<7 x s32>) = G_FREEZE [[DEF]]
-    ; CHECK: S_NOP 0, implicit [[FREEZE]](<7 x s32>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<7 x s32>) = G_FREEZE [[DEF]]
+    ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](<7 x s32>)
     %0:_(<7 x s32>) = G_IMPLICIT_DEF
     %1:_(<7 x s32>) = G_FREEZE %0
     S_NOP 0, implicit %1
@@ -361,8 +361,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v8s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<8 x s32>) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[FREEZE]](<8 x s32>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<8 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[FREEZE]](<8 x s32>)
     %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     %1:_(<8 x s32>) = G_FREEZE %0
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -375,8 +375,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v16s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](<16 x s32>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](<16 x s32>)
     %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     %1:_(<16 x s32>) = G_FREEZE %0
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
@@ -389,8 +389,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v17s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[COPY]]
-    ; CHECK: S_NOP 0, implicit [[FREEZE]](<16 x s32>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](<16 x s32>)
     %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     %1:_(<16 x s32>) = G_FREEZE %0
     S_NOP 0, implicit %1
@@ -403,8 +403,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v32s32
     ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]]
-    ; CHECK: S_NOP 0, implicit [[FREEZE]](<32 x s32>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]]
+    ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](<32 x s32>)
     %0:_(<32 x s32>) = G_IMPLICIT_DEF
     %1:_(<32 x s32>) = G_FREEZE %0
     S_NOP 0, implicit %1
@@ -417,19 +417,19 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v33s32
     ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
-    ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
-    ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
-    ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32)
-    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32)
-    ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV32]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32)
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR]]
-    ; CHECK: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR1]]
-    ; CHECK: [[FREEZE2:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR2]]
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<528 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<16 x s32>), [[FREEZE1]](<16 x s32>), [[FREEZE2]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>)
-    ; CHECK: [[UV48:%[0-9]+]]:_(<33 x s32>), [[UV49:%[0-9]+]]:_(<33 x s32>), [[UV50:%[0-9]+]]:_(<33 x s32>), [[UV51:%[0-9]+]]:_(<33 x s32>), [[UV52:%[0-9]+]]:_(<33 x s32>), [[UV53:%[0-9]+]]:_(<33 x s32>), [[UV54:%[0-9]+]]:_(<33 x s32>), [[UV55:%[0-9]+]]:_(<33 x s32>), [[UV56:%[0-9]+]]:_(<33 x s32>), [[UV57:%[0-9]+]]:_(<33 x s32>), [[UV58:%[0-9]+]]:_(<33 x s32>), [[UV59:%[0-9]+]]:_(<33 x s32>), [[UV60:%[0-9]+]]:_(<33 x s32>), [[UV61:%[0-9]+]]:_(<33 x s32>), [[UV62:%[0-9]+]]:_(<33 x s32>), [[UV63:%[0-9]+]]:_(<33 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<528 x s32>)
-    ; CHECK: S_NOP 0, implicit [[UV48]](<33 x s32>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
+    ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
+    ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV32]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR]]
+    ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR2]]
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<528 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<16 x s32>), [[FREEZE1]](<16 x s32>), [[FREEZE2]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>)
+    ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(<33 x s32>), [[UV49:%[0-9]+]]:_(<33 x s32>), [[UV50:%[0-9]+]]:_(<33 x s32>), [[UV51:%[0-9]+]]:_(<33 x s32>), [[UV52:%[0-9]+]]:_(<33 x s32>), [[UV53:%[0-9]+]]:_(<33 x s32>), [[UV54:%[0-9]+]]:_(<33 x s32>), [[UV55:%[0-9]+]]:_(<33 x s32>), [[UV56:%[0-9]+]]:_(<33 x s32>), [[UV57:%[0-9]+]]:_(<33 x s32>), [[UV58:%[0-9]+]]:_(<33 x s32>), [[UV59:%[0-9]+]]:_(<33 x s32>), [[UV60:%[0-9]+]]:_(<33 x s32>), [[UV61:%[0-9]+]]:_(<33 x s32>), [[UV62:%[0-9]+]]:_(<33 x s32>), [[UV63:%[0-9]+]]:_(<33 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<528 x s32>)
+    ; CHECK-NEXT: S_NOP 0, implicit [[UV48]](<33 x s32>)
     %0:_(<33 x s32>) = G_IMPLICIT_DEF
     %1:_(<33 x s32>) = G_FREEZE %0
     S_NOP 0, implicit %1
@@ -442,12 +442,12 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v64s32
     ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]]
-    ; CHECK: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]]
-    ; CHECK: [[FREEZE2:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]]
-    ; CHECK: [[FREEZE3:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]]
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<16 x s32>), [[FREEZE1]](<16 x s32>), [[FREEZE2]](<16 x s32>), [[FREEZE3]](<16 x s32>)
-    ; CHECK: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]]
+    ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]]
+    ; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]]
+    ; CHECK-NEXT: [[FREEZE3:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]]
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<16 x s32>), [[FREEZE1]](<16 x s32>), [[FREEZE2]](<16 x s32>), [[FREEZE3]](<16 x s32>)
+    ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>)
     %0:_(<64 x s32>) = G_IMPLICIT_DEF
     %1:_(<64 x s32>) = G_FREEZE %0
     S_NOP 0, implicit %1
@@ -461,16 +461,16 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v2s1
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]]
-    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE [[BUILD_VECTOR]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]]
+    ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE [[BUILD_VECTOR]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s1>) = G_ICMP intpred(ne), %0, %1
@@ -487,18 +487,18 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v3s1
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
-    ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]]
-    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]]
-    ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
-    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<3 x s32>) = G_FREEZE [[BUILD_VECTOR]]
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<3 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]]
+    ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]]
+    ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
+    ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<3 x s32>) = G_FREEZE [[BUILD_VECTOR]]
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<3 x s32>)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
     %2:_(<3 x s1>) = G_ICMP intpred(ne), %0, %1
@@ -514,8 +514,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v2s8
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](<2 x s32>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_TRUNC %0
     %2:_(<2 x s8>) = G_FREEZE %1
@@ -530,16 +530,16 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v3s8
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY]](<3 x s32>)
-    ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s8>), 0
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[INSERT]](<4 x s8>)
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[ANYEXT]]
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[FREEZE]](<4 x s32>)
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[TRUNC1]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>)
-    ; CHECK: [[UV:%[0-9]+]]:_(<3 x s8>), [[UV1:%[0-9]+]]:_(<3 x s8>), [[UV2:%[0-9]+]]:_(<3 x s8>), [[UV3:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV]](<3 x s8>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT1]](<3 x s32>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY]](<3 x s32>)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s8>), 0
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[INSERT]](<4 x s8>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[ANYEXT]]
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[FREEZE]](<4 x s32>)
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[TRUNC1]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s8>), [[UV1:%[0-9]+]]:_(<3 x s8>), [[UV2:%[0-9]+]]:_(<3 x s8>), [[UV3:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV]](<3 x s8>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT1]](<3 x s32>)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s8>) = G_TRUNC %0
     %2:_(<3 x s8>) = G_FREEZE %1
@@ -554,8 +554,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v2s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<2 x s16>) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0 = COPY [[FREEZE]](<2 x s16>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s16>) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = G_FREEZE %0
     $vgpr0 = COPY %1
@@ -568,25 +568,25 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v3s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
-    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>)
-    ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; CHECK: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV3]](<3 x s16>), 0
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[INSERT]]
-    ; CHECK: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<4 x s16>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
-    ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+    ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV3]](<3 x s16>), 0
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[INSERT]]
+    ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s16>) = G_TRUNC %0
     %2:_(<3 x s16>) = G_FREEZE %1
@@ -601,8 +601,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v4s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](<4 x s16>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = G_FREEZE %0
     $vgpr0_vgpr1 = COPY %1
@@ -615,29 +615,29 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v5s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<5 x s32>)
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
-    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
-    ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s32>)
-    ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[DEF1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; CHECK: [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>)
-    ; CHECK: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV5]](<5 x s16>), 0
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<6 x s16>) = G_FREEZE [[INSERT]]
-    ; CHECK: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<6 x s16>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
-    ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32), [[LSHR1]](s32), [[BITCAST2]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[BUILD_VECTOR3]](<5 x s32>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<5 x s32>)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s32>)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[DEF1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>)
+    ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV5]](<5 x s16>), 0
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<6 x s16>) = G_FREEZE [[INSERT]]
+    ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<6 x s16>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+    ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32), [[LSHR1]](s32), [[BITCAST2]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[BUILD_VECTOR3]](<5 x s32>)
     %0:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
     %1:_(<5 x s16>) = G_TRUNC %0
     %2:_(<5 x s16>) = G_FREEZE %1
@@ -652,8 +652,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v6s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<6 x s16>) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<6 x s16>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<6 x s16>) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<6 x s16>)
     %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<6 x s16>) = G_FREEZE %0
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -666,8 +666,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v8s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<8 x s16>) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<8 x s16>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<8 x s16>) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<8 x s16>)
     %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<8 x s16>) = G_FREEZE %0
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -680,8 +680,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v2s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<2 x s64>) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<2 x s64>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s64>) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<2 x s64>)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = G_FREEZE %0
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -695,13 +695,13 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v4s8
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BUILD_VECTOR]]
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<4 x s32>)
-    ; CHECK: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32), implicit [[UV3]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BUILD_VECTOR]]
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<4 x s32>)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32), implicit [[UV3]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -727,8 +727,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_p0
     ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(p0) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](p0)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(p0) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](p0)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(p0) = G_FREEZE %0
     $vgpr0_vgpr1 = COPY %1
@@ -741,8 +741,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_p1
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(p1) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](p1)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(p1) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](p1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p1) = G_FREEZE %0
     $vgpr0_vgpr1 = COPY %1
@@ -755,8 +755,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_p2
     ; CHECK: [[COPY:%[0-9]+]]:_(p2) = COPY $vgpr0
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(p2) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0 = COPY [[FREEZE]](p2)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(p2) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](p2)
     %0:_(p2) = COPY $vgpr0
     %1:_(p2) = G_FREEZE %0
     $vgpr0 = COPY %1
@@ -769,8 +769,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_p3
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(p3) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0 = COPY [[FREEZE]](p3)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(p3) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](p3)
     %0:_(p3) = COPY $vgpr0
     %1:_(p3) = G_FREEZE %0
     $vgpr0 = COPY %1
@@ -783,8 +783,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_p4
     ; CHECK: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(p4) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](p4)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(p4) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](p4)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(p4) = G_FREEZE %0
     $vgpr0_vgpr1 = COPY %1
@@ -797,8 +797,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_p5
     ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(p5) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0 = COPY [[FREEZE]](p5)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(p5) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FREEZE]](p5)
     %0:_(p5) = COPY $vgpr0
     %1:_(p5) = G_FREEZE %0
     $vgpr0 = COPY %1
@@ -811,8 +811,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_p999
     ; CHECK: [[COPY:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(p999) = G_FREEZE [[COPY]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](p999)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(p999) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FREEZE]](p999)
     %0:_(p999) = COPY $vgpr0_vgpr1
     %1:_(p999) = G_FREEZE %0
     $vgpr0_vgpr1 = COPY %1
@@ -826,8 +826,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v2s1024
     ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s1024>) = G_IMPLICIT_DEF
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<2 x s1024>) = G_FREEZE [[DEF]]
-    ; CHECK: S_NOP 0, implicit [[FREEZE]](<2 x s1024>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s1024>) = G_FREEZE [[DEF]]
+    ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](<2 x s1024>)
     %0:_(<2 x s1024>) = G_IMPLICIT_DEF
     %1:_(<2 x s1024>) = G_FREEZE %0
     S_NOP 0, implicit %1
@@ -841,8 +841,8 @@ body: |
 
     ; CHECK-LABEL: name: test_freeze_v3s1024
     ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s1024>) = G_IMPLICIT_DEF
-    ; CHECK: [[FREEZE:%[0-9]+]]:_(<3 x s1024>) = G_FREEZE [[DEF]]
-    ; CHECK: S_NOP 0, implicit [[FREEZE]](<3 x s1024>)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<3 x s1024>) = G_FREEZE [[DEF]]
+    ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](<3 x s1024>)
     %0:_(<3 x s1024>) = G_IMPLICIT_DEF
     %1:_(<3 x s1024>) = G_FREEZE %0
     S_NOP 0, implicit %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
index 800be96c92516..f5b89b7e9de9c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
@@ -9,9 +9,9 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2
     ; CHECK-LABEL: name: insert_vector_elt_0_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s32) = G_CONSTANT i32 0
@@ -27,9 +27,9 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2
     ; CHECK-LABEL: name: insert_vector_elt_1_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s32) = G_CONSTANT i32 1
@@ -45,9 +45,9 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2
     ; CHECK-LABEL: name: insert_vector_elt_2_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s32) = G_CONSTANT i32 2
@@ -64,11 +64,11 @@ body: |
 
     ; CHECK-LABEL: name: insert_vector_elt_v2s32_varidx_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr3_vgpr4
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
-    ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[TRUNC]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr3_vgpr4
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[TRUNC]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s64) = COPY $vgpr3_vgpr4
@@ -85,11 +85,11 @@ body: |
 
     ; CHECK-LABEL: name: insert_vector_elt_v16s32_varidx_i64
     ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr16
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr17_vgpr18
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
-    ; CHECK: [[IVEC:%[0-9]+]]:_(<16 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[TRUNC]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[IVEC]](<16 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr16
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr17_vgpr18
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<16 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[TRUNC]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[IVEC]](<16 x s32>)
     %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     %1:_(s32) = COPY $vgpr16
     %2:_(s64) = COPY $vgpr17_vgpr18
@@ -106,9 +106,9 @@ body: |
 
     ; CHECK-LABEL: name: insert_vector_elt_0_v16s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<16 x s64>) = G_INSERT [[DEF]], [[COPY]](s64), 0
-    ; CHECK: S_ENDPGM 0, implicit [[INSERT]](<16 x s64>)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<16 x s64>) = G_INSERT [[DEF]], [[COPY]](s64), 0
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT]](<16 x s64>)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(<16 x s64>) = G_IMPLICIT_DEF
     %2:_(s32) = G_CONSTANT i32 0
@@ -125,9 +125,9 @@ body: |
 
     ; CHECK-LABEL: name: insert_vector_elt_0_v2s32_s8
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s8) = G_CONSTANT i8 0
@@ -144,9 +144,9 @@ body: |
 
     ; CHECK-LABEL: name: insert_vector_elt_0_v2i8_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[DEF]], [[COPY]](s32), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[DEF]], [[COPY]](s32), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_TRUNC %0
     %2:_(<2 x s8>) = G_IMPLICIT_DEF
@@ -165,9 +165,9 @@ body: |
 
     ; CHECK-LABEL: name: insert_vector_elt_v4s32_s32_look_through_trunc_0
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = COPY $vgpr4
     %2:_(s64) = G_CONSTANT i64 0
@@ -185,93 +185,93 @@ body: |
 
     ; CHECK-LABEL: name: insert_vector_elt_64_65_v64s32
     ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
-    ; CHECK: [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
-    ; CHECK: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
-    ; CHECK: [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
-    ; CHECK: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CHECK: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CHECK: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD1]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1)
-    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-    ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CHECK: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD2]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1)
-    ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
-    ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CHECK: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1)
-    ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80
-    ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CHECK: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1)
-    ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96
-    ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CHECK: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1)
-    ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112
-    ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CHECK: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1)
-    ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
-    ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CHECK: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1)
-    ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 144
-    ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; CHECK: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1)
-    ; CHECK: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 160
-    ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; CHECK: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1)
-    ; CHECK: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 176
-    ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; CHECK: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1)
-    ; CHECK: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 192
-    ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; CHECK: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1)
-    ; CHECK: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 208
-    ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64)
-    ; CHECK: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1)
-    ; CHECK: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 224
-    ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C13]](s64)
-    ; CHECK: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1)
-    ; CHECK: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 240
-    ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C14]](s64)
-    ; CHECK: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1)
-    ; CHECK: [[UV16:%[0-9]+]]:_(<4 x s32>), [[UV17:%[0-9]+]]:_(<4 x s32>), [[UV18:%[0-9]+]]:_(<4 x s32>), [[UV19:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
-    ; CHECK: [[UV20:%[0-9]+]]:_(<4 x s32>), [[UV21:%[0-9]+]]:_(<4 x s32>), [[UV22:%[0-9]+]]:_(<4 x s32>), [[UV23:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
-    ; CHECK: [[UV24:%[0-9]+]]:_(<4 x s32>), [[UV25:%[0-9]+]]:_(<4 x s32>), [[UV26:%[0-9]+]]:_(<4 x s32>), [[UV27:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
-    ; CHECK: [[UV28:%[0-9]+]]:_(<4 x s32>), [[UV29:%[0-9]+]]:_(<4 x s32>), [[UV30:%[0-9]+]]:_(<4 x s32>), [[UV31:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
-    ; CHECK: G_STORE [[UV16]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64)
-    ; CHECK: G_STORE [[UV17]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64)
-    ; CHECK: G_STORE [[UV18]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64)
-    ; CHECK: G_STORE [[UV19]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C3]](s64)
-    ; CHECK: G_STORE [[UV20]](<4 x s32>), [[PTR_ADD18]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](s64)
-    ; CHECK: G_STORE [[UV21]](<4 x s32>), [[PTR_ADD19]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](s64)
-    ; CHECK: G_STORE [[UV22]](<4 x s32>), [[PTR_ADD20]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](s64)
-    ; CHECK: G_STORE [[UV23]](<4 x s32>), [[PTR_ADD21]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](s64)
-    ; CHECK: G_STORE [[UV24]](<4 x s32>), [[PTR_ADD22]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](s64)
-    ; CHECK: G_STORE [[UV25]](<4 x s32>), [[PTR_ADD23]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](s64)
-    ; CHECK: G_STORE [[UV26]](<4 x s32>), [[PTR_ADD24]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](s64)
-    ; CHECK: G_STORE [[UV27]](<4 x s32>), [[PTR_ADD25]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](s64)
-    ; CHECK: G_STORE [[UV28]](<4 x s32>), [[PTR_ADD26]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](s64)
-    ; CHECK: G_STORE [[UV29]](<4 x s32>), [[PTR_ADD27]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](s64)
-    ; CHECK: G_STORE [[UV30]](<4 x s32>), [[PTR_ADD28]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](s64)
-    ; CHECK: G_STORE [[UV31]](<4 x s32>), [[PTR_ADD29]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
+    ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
+    ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
+    ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
+    ; CHECK-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK-NEXT: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD1]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+    ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK-NEXT: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD2]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+    ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CHECK-NEXT: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80
+    ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CHECK-NEXT: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96
+    ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CHECK-NEXT: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112
+    ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CHECK-NEXT: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
+    ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CHECK-NEXT: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 144
+    ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; CHECK-NEXT: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 160
+    ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; CHECK-NEXT: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 176
+    ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; CHECK-NEXT: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 192
+    ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; CHECK-NEXT: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 208
+    ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64)
+    ; CHECK-NEXT: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 224
+    ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C13]](s64)
+    ; CHECK-NEXT: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 240
+    ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C14]](s64)
+    ; CHECK-NEXT: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1)
+    ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s32>), [[UV17:%[0-9]+]]:_(<4 x s32>), [[UV18:%[0-9]+]]:_(<4 x s32>), [[UV19:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
+    ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<4 x s32>), [[UV21:%[0-9]+]]:_(<4 x s32>), [[UV22:%[0-9]+]]:_(<4 x s32>), [[UV23:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
+    ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(<4 x s32>), [[UV25:%[0-9]+]]:_(<4 x s32>), [[UV26:%[0-9]+]]:_(<4 x s32>), [[UV27:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
+    ; CHECK-NEXT: [[UV28:%[0-9]+]]:_(<4 x s32>), [[UV29:%[0-9]+]]:_(<4 x s32>), [[UV30:%[0-9]+]]:_(<4 x s32>), [[UV31:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
+    ; CHECK-NEXT: G_STORE [[UV16]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64)
+    ; CHECK-NEXT: G_STORE [[UV17]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+    ; CHECK-NEXT: G_STORE [[UV18]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+    ; CHECK-NEXT: G_STORE [[UV19]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C3]](s64)
+    ; CHECK-NEXT: G_STORE [[UV20]](<4 x s32>), [[PTR_ADD18]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](s64)
+    ; CHECK-NEXT: G_STORE [[UV21]](<4 x s32>), [[PTR_ADD19]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](s64)
+    ; CHECK-NEXT: G_STORE [[UV22]](<4 x s32>), [[PTR_ADD20]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](s64)
+    ; CHECK-NEXT: G_STORE [[UV23]](<4 x s32>), [[PTR_ADD21]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](s64)
+    ; CHECK-NEXT: G_STORE [[UV24]](<4 x s32>), [[PTR_ADD22]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](s64)
+    ; CHECK-NEXT: G_STORE [[UV25]](<4 x s32>), [[PTR_ADD23]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](s64)
+    ; CHECK-NEXT: G_STORE [[UV26]](<4 x s32>), [[PTR_ADD24]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](s64)
+    ; CHECK-NEXT: G_STORE [[UV27]](<4 x s32>), [[PTR_ADD25]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](s64)
+    ; CHECK-NEXT: G_STORE [[UV28]](<4 x s32>), [[PTR_ADD26]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](s64)
+    ; CHECK-NEXT: G_STORE [[UV29]](<4 x s32>), [[PTR_ADD27]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](s64)
+    ; CHECK-NEXT: G_STORE [[UV30]](<4 x s32>), [[PTR_ADD28]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](s64)
+    ; CHECK-NEXT: G_STORE [[UV31]](<4 x s32>), [[PTR_ADD29]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = G_CONSTANT i32 64
     %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4)
@@ -294,66 +294,66 @@ body: |
 
     ; CHECK-LABEL: name: insert_vector_elt_33_v64s32
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 4, addrspace 4)
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
-    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 64, align 4, addrspace 4)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
-    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4)
-    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192
-    ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x s32>) from unknown-address + 192, align 4, addrspace 4)
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12345
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[LOAD2]](<16 x s32>), [[LOAD3]](<16 x s32>)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<32 x s32>) = G_INSERT [[CONCAT_VECTORS]], [[C3]](s32), 32
-    ; CHECK: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>)
-    ; CHECK: [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>)
-    ; CHECK: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>), [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[INSERT]](<32 x s32>)
-    ; CHECK: G_STORE [[UV]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](s64)
-    ; CHECK: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
-    ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](s64)
-    ; CHECK: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1)
-    ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-    ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](s64)
-    ; CHECK: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64)
-    ; CHECK: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1)
-    ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 80
-    ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](s64)
-    ; CHECK: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1)
-    ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 96
-    ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](s64)
-    ; CHECK: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1)
-    ; CHECK: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 112
-    ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](s64)
-    ; CHECK: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64)
-    ; CHECK: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1)
-    ; CHECK: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 144
-    ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](s64)
-    ; CHECK: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1)
-    ; CHECK: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 160
-    ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](s64)
-    ; CHECK: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1)
-    ; CHECK: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 176
-    ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](s64)
-    ; CHECK: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64)
-    ; CHECK: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1)
-    ; CHECK: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 208
-    ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](s64)
-    ; CHECK: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1)
-    ; CHECK: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 224
-    ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](s64)
-    ; CHECK: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1)
-    ; CHECK: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 240
-    ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C15]](s64)
-    ; CHECK: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1)
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 4, addrspace 4)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 64, align 4, addrspace 4)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
+    ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192
+    ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x s32>) from unknown-address + 192, align 4, addrspace 4)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12345
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[LOAD2]](<16 x s32>), [[LOAD3]](<16 x s32>)
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<32 x s32>) = G_INSERT [[CONCAT_VECTORS]], [[C3]](s32), 32
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>)
+    ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>)
+    ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>), [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[INSERT]](<32 x s32>)
+    ; CHECK-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](s64)
+    ; CHECK-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](s64)
+    ; CHECK-NEXT: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+    ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](s64)
+    ; CHECK-NEXT: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64)
+    ; CHECK-NEXT: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 80
+    ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](s64)
+    ; CHECK-NEXT: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 96
+    ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](s64)
+    ; CHECK-NEXT: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 112
+    ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](s64)
+    ; CHECK-NEXT: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+    ; CHECK-NEXT: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 144
+    ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](s64)
+    ; CHECK-NEXT: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 160
+    ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](s64)
+    ; CHECK-NEXT: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 176
+    ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](s64)
+    ; CHECK-NEXT: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+    ; CHECK-NEXT: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 208
+    ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](s64)
+    ; CHECK-NEXT: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 224
+    ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](s64)
+    ; CHECK-NEXT: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 240
+    ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C15]](s64)
+    ; CHECK-NEXT: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = G_CONSTANT i32 33
     %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4)
@@ -372,405 +372,405 @@ body: |
 
     ; CHECK-LABEL: name: insert_vector_elt_varidx_v64s32
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
-    ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 4, addrspace 4)
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
-    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 64, align 4, addrspace 4)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
-    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4)
-    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192
-    ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x s32>) from unknown-address + 192, align 4, addrspace 4)
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12345
-    ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>)
-    ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>)
-    ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>)
-    ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>)
-    ; CHECK: G_STORE [[UV]](s32), [[FRAME_INDEX]](p5) :: (store (s32) into %stack.0, align 256, addrspace 5)
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(p5) = COPY [[PTR_ADD3]](p5)
-    ; CHECK: G_STORE [[UV1]](s32), [[COPY2]](p5) :: (store (s32) into %stack.0 + 4, basealign 256, addrspace 5)
-    ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32)
-    ; CHECK: [[COPY3:%[0-9]+]]:_(p5) = COPY [[PTR_ADD4]](p5)
-    ; CHECK: G_STORE [[UV2]](s32), [[COPY3]](p5) :: (store (s32) into %stack.0 + 8, align 8, basealign 256, addrspace 5)
-    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32)
-    ; CHECK: [[COPY4:%[0-9]+]]:_(p5) = COPY [[PTR_ADD5]](p5)
-    ; CHECK: G_STORE [[UV3]](s32), [[COPY4]](p5) :: (store (s32) into %stack.0 + 12, basealign 256, addrspace 5)
-    ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32)
-    ; CHECK: [[COPY5:%[0-9]+]]:_(p5) = COPY [[PTR_ADD6]](p5)
-    ; CHECK: G_STORE [[UV4]](s32), [[COPY5]](p5) :: (store (s32) into %stack.0 + 16, align 16, basealign 256, addrspace 5)
-    ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32)
-    ; CHECK: [[COPY6:%[0-9]+]]:_(p5) = COPY [[PTR_ADD7]](p5)
-    ; CHECK: G_STORE [[UV5]](s32), [[COPY6]](p5) :: (store (s32) into %stack.0 + 20, basealign 256, addrspace 5)
-    ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32)
-    ; CHECK: [[COPY7:%[0-9]+]]:_(p5) = COPY [[PTR_ADD8]](p5)
-    ; CHECK: G_STORE [[UV6]](s32), [[COPY7]](p5) :: (store (s32) into %stack.0 + 24, align 8, basealign 256, addrspace 5)
-    ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32)
-    ; CHECK: [[COPY8:%[0-9]+]]:_(p5) = COPY [[PTR_ADD9]](p5)
-    ; CHECK: G_STORE [[UV7]](s32), [[COPY8]](p5) :: (store (s32) into %stack.0 + 28, basealign 256, addrspace 5)
-    ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32)
-    ; CHECK: [[COPY9:%[0-9]+]]:_(p5) = COPY [[PTR_ADD10]](p5)
-    ; CHECK: G_STORE [[UV8]](s32), [[COPY9]](p5) :: (store (s32) into %stack.0 + 32, align 32, basealign 256, addrspace 5)
-    ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 36
-    ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32)
-    ; CHECK: [[COPY10:%[0-9]+]]:_(p5) = COPY [[PTR_ADD11]](p5)
-    ; CHECK: G_STORE [[UV9]](s32), [[COPY10]](p5) :: (store (s32) into %stack.0 + 36, basealign 256, addrspace 5)
-    ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
-    ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32)
-    ; CHECK: [[COPY11:%[0-9]+]]:_(p5) = COPY [[PTR_ADD12]](p5)
-    ; CHECK: G_STORE [[UV10]](s32), [[COPY11]](p5) :: (store (s32) into %stack.0 + 40, align 8, basealign 256, addrspace 5)
-    ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 44
-    ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32)
-    ; CHECK: [[COPY12:%[0-9]+]]:_(p5) = COPY [[PTR_ADD13]](p5)
-    ; CHECK: G_STORE [[UV11]](s32), [[COPY12]](p5) :: (store (s32) into %stack.0 + 44, basealign 256, addrspace 5)
-    ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
-    ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32)
-    ; CHECK: [[COPY13:%[0-9]+]]:_(p5) = COPY [[PTR_ADD14]](p5)
-    ; CHECK: G_STORE [[UV12]](s32), [[COPY13]](p5) :: (store (s32) into %stack.0 + 48, align 16, basealign 256, addrspace 5)
-    ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 52
-    ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32)
-    ; CHECK: [[COPY14:%[0-9]+]]:_(p5) = COPY [[PTR_ADD15]](p5)
-    ; CHECK: G_STORE [[UV13]](s32), [[COPY14]](p5) :: (store (s32) into %stack.0 + 52, basealign 256, addrspace 5)
-    ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 56
-    ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32)
-    ; CHECK: [[COPY15:%[0-9]+]]:_(p5) = COPY [[PTR_ADD16]](p5)
-    ; CHECK: G_STORE [[UV14]](s32), [[COPY15]](p5) :: (store (s32) into %stack.0 + 56, align 8, basealign 256, addrspace 5)
-    ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 60
-    ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32)
-    ; CHECK: [[COPY16:%[0-9]+]]:_(p5) = COPY [[PTR_ADD17]](p5)
-    ; CHECK: G_STORE [[UV15]](s32), [[COPY16]](p5) :: (store (s32) into %stack.0 + 60, basealign 256, addrspace 5)
-    ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
-    ; CHECK: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32)
-    ; CHECK: [[COPY17:%[0-9]+]]:_(p5) = COPY [[PTR_ADD18]](p5)
-    ; CHECK: G_STORE [[UV16]](s32), [[COPY17]](p5) :: (store (s32) into %stack.0 + 64, align 64, basealign 256, addrspace 5)
-    ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 68
-    ; CHECK: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32)
-    ; CHECK: [[COPY18:%[0-9]+]]:_(p5) = COPY [[PTR_ADD19]](p5)
-    ; CHECK: G_STORE [[UV17]](s32), [[COPY18]](p5) :: (store (s32) into %stack.0 + 68, basealign 256, addrspace 5)
-    ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 72
-    ; CHECK: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32)
-    ; CHECK: [[COPY19:%[0-9]+]]:_(p5) = COPY [[PTR_ADD20]](p5)
-    ; CHECK: G_STORE [[UV18]](s32), [[COPY19]](p5) :: (store (s32) into %stack.0 + 72, align 8, basealign 256, addrspace 5)
-    ; CHECK: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 76
-    ; CHECK: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32)
-    ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY [[PTR_ADD21]](p5)
-    ; CHECK: G_STORE [[UV19]](s32), [[COPY20]](p5) :: (store (s32) into %stack.0 + 76, basealign 256, addrspace 5)
-    ; CHECK: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 80
-    ; CHECK: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32)
-    ; CHECK: [[COPY21:%[0-9]+]]:_(p5) = COPY [[PTR_ADD22]](p5)
-    ; CHECK: G_STORE [[UV20]](s32), [[COPY21]](p5) :: (store (s32) into %stack.0 + 80, align 16, basealign 256, addrspace 5)
-    ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 84
-    ; CHECK: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32)
-    ; CHECK: [[COPY22:%[0-9]+]]:_(p5) = COPY [[PTR_ADD23]](p5)
-    ; CHECK: G_STORE [[UV21]](s32), [[COPY22]](p5) :: (store (s32) into %stack.0 + 84, basealign 256, addrspace 5)
-    ; CHECK: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 88
-    ; CHECK: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32)
-    ; CHECK: [[COPY23:%[0-9]+]]:_(p5) = COPY [[PTR_ADD24]](p5)
-    ; CHECK: G_STORE [[UV22]](s32), [[COPY23]](p5) :: (store (s32) into %stack.0 + 88, align 8, basealign 256, addrspace 5)
-    ; CHECK: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 92
-    ; CHECK: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32)
-    ; CHECK: [[COPY24:%[0-9]+]]:_(p5) = COPY [[PTR_ADD25]](p5)
-    ; CHECK: G_STORE [[UV23]](s32), [[COPY24]](p5) :: (store (s32) into %stack.0 + 92, basealign 256, addrspace 5)
-    ; CHECK: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 96
-    ; CHECK: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32)
-    ; CHECK: [[COPY25:%[0-9]+]]:_(p5) = COPY [[PTR_ADD26]](p5)
-    ; CHECK: G_STORE [[UV24]](s32), [[COPY25]](p5) :: (store (s32) into %stack.0 + 96, align 32, basealign 256, addrspace 5)
-    ; CHECK: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
-    ; CHECK: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32)
-    ; CHECK: [[COPY26:%[0-9]+]]:_(p5) = COPY [[PTR_ADD27]](p5)
-    ; CHECK: G_STORE [[UV25]](s32), [[COPY26]](p5) :: (store (s32) into %stack.0 + 100, basealign 256, addrspace 5)
-    ; CHECK: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 104
-    ; CHECK: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32)
-    ; CHECK: [[COPY27:%[0-9]+]]:_(p5) = COPY [[PTR_ADD28]](p5)
-    ; CHECK: G_STORE [[UV26]](s32), [[COPY27]](p5) :: (store (s32) into %stack.0 + 104, align 8, basealign 256, addrspace 5)
-    ; CHECK: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 108
-    ; CHECK: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32)
-    ; CHECK: [[COPY28:%[0-9]+]]:_(p5) = COPY [[PTR_ADD29]](p5)
-    ; CHECK: G_STORE [[UV27]](s32), [[COPY28]](p5) :: (store (s32) into %stack.0 + 108, basealign 256, addrspace 5)
-    ; CHECK: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 112
-    ; CHECK: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32)
-    ; CHECK: [[COPY29:%[0-9]+]]:_(p5) = COPY [[PTR_ADD30]](p5)
-    ; CHECK: G_STORE [[UV28]](s32), [[COPY29]](p5) :: (store (s32) into %stack.0 + 112, align 16, basealign 256, addrspace 5)
-    ; CHECK: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 116
-    ; CHECK: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32)
-    ; CHECK: [[COPY30:%[0-9]+]]:_(p5) = COPY [[PTR_ADD31]](p5)
-    ; CHECK: G_STORE [[UV29]](s32), [[COPY30]](p5) :: (store (s32) into %stack.0 + 116, basealign 256, addrspace 5)
-    ; CHECK: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 120
-    ; CHECK: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32)
-    ; CHECK: [[COPY31:%[0-9]+]]:_(p5) = COPY [[PTR_ADD32]](p5)
-    ; CHECK: G_STORE [[UV30]](s32), [[COPY31]](p5) :: (store (s32) into %stack.0 + 120, align 8, basealign 256, addrspace 5)
-    ; CHECK: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 124
-    ; CHECK: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32)
-    ; CHECK: [[COPY32:%[0-9]+]]:_(p5) = COPY [[PTR_ADD33]](p5)
-    ; CHECK: G_STORE [[UV31]](s32), [[COPY32]](p5) :: (store (s32) into %stack.0 + 124, basealign 256, addrspace 5)
-    ; CHECK: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
-    ; CHECK: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32)
-    ; CHECK: [[COPY33:%[0-9]+]]:_(p5) = COPY [[PTR_ADD34]](p5)
-    ; CHECK: G_STORE [[UV32]](s32), [[COPY33]](p5) :: (store (s32) into %stack.0 + 128, align 128, basealign 256, addrspace 5)
-    ; CHECK: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 132
-    ; CHECK: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32)
-    ; CHECK: [[COPY34:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5)
-    ; CHECK: G_STORE [[UV33]](s32), [[COPY34]](p5) :: (store (s32) into %stack.0 + 132, basealign 256, addrspace 5)
-    ; CHECK: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 136
-    ; CHECK: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32)
-    ; CHECK: [[COPY35:%[0-9]+]]:_(p5) = COPY [[PTR_ADD36]](p5)
-    ; CHECK: G_STORE [[UV34]](s32), [[COPY35]](p5) :: (store (s32) into %stack.0 + 136, align 8, basealign 256, addrspace 5)
-    ; CHECK: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 140
-    ; CHECK: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32)
-    ; CHECK: [[COPY36:%[0-9]+]]:_(p5) = COPY [[PTR_ADD37]](p5)
-    ; CHECK: G_STORE [[UV35]](s32), [[COPY36]](p5) :: (store (s32) into %stack.0 + 140, basealign 256, addrspace 5)
-    ; CHECK: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 144
-    ; CHECK: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32)
-    ; CHECK: [[COPY37:%[0-9]+]]:_(p5) = COPY [[PTR_ADD38]](p5)
-    ; CHECK: G_STORE [[UV36]](s32), [[COPY37]](p5) :: (store (s32) into %stack.0 + 144, align 16, basealign 256, addrspace 5)
-    ; CHECK: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 148
-    ; CHECK: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32)
-    ; CHECK: [[COPY38:%[0-9]+]]:_(p5) = COPY [[PTR_ADD39]](p5)
-    ; CHECK: G_STORE [[UV37]](s32), [[COPY38]](p5) :: (store (s32) into %stack.0 + 148, basealign 256, addrspace 5)
-    ; CHECK: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 152
-    ; CHECK: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32)
-    ; CHECK: [[COPY39:%[0-9]+]]:_(p5) = COPY [[PTR_ADD40]](p5)
-    ; CHECK: G_STORE [[UV38]](s32), [[COPY39]](p5) :: (store (s32) into %stack.0 + 152, align 8, basealign 256, addrspace 5)
-    ; CHECK: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 156
-    ; CHECK: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32)
-    ; CHECK: [[COPY40:%[0-9]+]]:_(p5) = COPY [[PTR_ADD41]](p5)
-    ; CHECK: G_STORE [[UV39]](s32), [[COPY40]](p5) :: (store (s32) into %stack.0 + 156, basealign 256, addrspace 5)
-    ; CHECK: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 160
-    ; CHECK: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32)
-    ; CHECK: [[COPY41:%[0-9]+]]:_(p5) = COPY [[PTR_ADD42]](p5)
-    ; CHECK: G_STORE [[UV40]](s32), [[COPY41]](p5) :: (store (s32) into %stack.0 + 160, align 32, basealign 256, addrspace 5)
-    ; CHECK: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 164
-    ; CHECK: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32)
-    ; CHECK: [[COPY42:%[0-9]+]]:_(p5) = COPY [[PTR_ADD43]](p5)
-    ; CHECK: G_STORE [[UV41]](s32), [[COPY42]](p5) :: (store (s32) into %stack.0 + 164, basealign 256, addrspace 5)
-    ; CHECK: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 168
-    ; CHECK: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32)
-    ; CHECK: [[COPY43:%[0-9]+]]:_(p5) = COPY [[PTR_ADD44]](p5)
-    ; CHECK: G_STORE [[UV42]](s32), [[COPY43]](p5) :: (store (s32) into %stack.0 + 168, align 8, basealign 256, addrspace 5)
-    ; CHECK: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 172
-    ; CHECK: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32)
-    ; CHECK: [[COPY44:%[0-9]+]]:_(p5) = COPY [[PTR_ADD45]](p5)
-    ; CHECK: G_STORE [[UV43]](s32), [[COPY44]](p5) :: (store (s32) into %stack.0 + 172, basealign 256, addrspace 5)
-    ; CHECK: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 176
-    ; CHECK: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32)
-    ; CHECK: [[COPY45:%[0-9]+]]:_(p5) = COPY [[PTR_ADD46]](p5)
-    ; CHECK: G_STORE [[UV44]](s32), [[COPY45]](p5) :: (store (s32) into %stack.0 + 176, align 16, basealign 256, addrspace 5)
-    ; CHECK: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 180
-    ; CHECK: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32)
-    ; CHECK: [[COPY46:%[0-9]+]]:_(p5) = COPY [[PTR_ADD47]](p5)
-    ; CHECK: G_STORE [[UV45]](s32), [[COPY46]](p5) :: (store (s32) into %stack.0 + 180, basealign 256, addrspace 5)
-    ; CHECK: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 184
-    ; CHECK: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32)
-    ; CHECK: [[COPY47:%[0-9]+]]:_(p5) = COPY [[PTR_ADD48]](p5)
-    ; CHECK: G_STORE [[UV46]](s32), [[COPY47]](p5) :: (store (s32) into %stack.0 + 184, align 8, basealign 256, addrspace 5)
-    ; CHECK: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 188
-    ; CHECK: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32)
-    ; CHECK: [[COPY48:%[0-9]+]]:_(p5) = COPY [[PTR_ADD49]](p5)
-    ; CHECK: G_STORE [[UV47]](s32), [[COPY48]](p5) :: (store (s32) into %stack.0 + 188, basealign 256, addrspace 5)
-    ; CHECK: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 192
-    ; CHECK: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32)
-    ; CHECK: [[COPY49:%[0-9]+]]:_(p5) = COPY [[PTR_ADD50]](p5)
-    ; CHECK: G_STORE [[UV48]](s32), [[COPY49]](p5) :: (store (s32) into %stack.0 + 192, align 64, basealign 256, addrspace 5)
-    ; CHECK: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 196
-    ; CHECK: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32)
-    ; CHECK: [[COPY50:%[0-9]+]]:_(p5) = COPY [[PTR_ADD51]](p5)
-    ; CHECK: G_STORE [[UV49]](s32), [[COPY50]](p5) :: (store (s32) into %stack.0 + 196, basealign 256, addrspace 5)
-    ; CHECK: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 200
-    ; CHECK: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32)
-    ; CHECK: [[COPY51:%[0-9]+]]:_(p5) = COPY [[PTR_ADD52]](p5)
-    ; CHECK: G_STORE [[UV50]](s32), [[COPY51]](p5) :: (store (s32) into %stack.0 + 200, align 8, basealign 256, addrspace 5)
-    ; CHECK: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 204
-    ; CHECK: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32)
-    ; CHECK: [[COPY52:%[0-9]+]]:_(p5) = COPY [[PTR_ADD53]](p5)
-    ; CHECK: G_STORE [[UV51]](s32), [[COPY52]](p5) :: (store (s32) into %stack.0 + 204, basealign 256, addrspace 5)
-    ; CHECK: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 208
-    ; CHECK: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32)
-    ; CHECK: [[COPY53:%[0-9]+]]:_(p5) = COPY [[PTR_ADD54]](p5)
-    ; CHECK: G_STORE [[UV52]](s32), [[COPY53]](p5) :: (store (s32) into %stack.0 + 208, align 16, basealign 256, addrspace 5)
-    ; CHECK: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 212
-    ; CHECK: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32)
-    ; CHECK: [[COPY54:%[0-9]+]]:_(p5) = COPY [[PTR_ADD55]](p5)
-    ; CHECK: G_STORE [[UV53]](s32), [[COPY54]](p5) :: (store (s32) into %stack.0 + 212, basealign 256, addrspace 5)
-    ; CHECK: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 216
-    ; CHECK: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32)
-    ; CHECK: [[COPY55:%[0-9]+]]:_(p5) = COPY [[PTR_ADD56]](p5)
-    ; CHECK: G_STORE [[UV54]](s32), [[COPY55]](p5) :: (store (s32) into %stack.0 + 216, align 8, basealign 256, addrspace 5)
-    ; CHECK: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 220
-    ; CHECK: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32)
-    ; CHECK: [[COPY56:%[0-9]+]]:_(p5) = COPY [[PTR_ADD57]](p5)
-    ; CHECK: G_STORE [[UV55]](s32), [[COPY56]](p5) :: (store (s32) into %stack.0 + 220, basealign 256, addrspace 5)
-    ; CHECK: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 224
-    ; CHECK: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32)
-    ; CHECK: [[COPY57:%[0-9]+]]:_(p5) = COPY [[PTR_ADD58]](p5)
-    ; CHECK: G_STORE [[UV56]](s32), [[COPY57]](p5) :: (store (s32) into %stack.0 + 224, align 32, basealign 256, addrspace 5)
-    ; CHECK: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 228
-    ; CHECK: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32)
-    ; CHECK: [[COPY58:%[0-9]+]]:_(p5) = COPY [[PTR_ADD59]](p5)
-    ; CHECK: G_STORE [[UV57]](s32), [[COPY58]](p5) :: (store (s32) into %stack.0 + 228, basealign 256, addrspace 5)
-    ; CHECK: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 232
-    ; CHECK: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32)
-    ; CHECK: [[COPY59:%[0-9]+]]:_(p5) = COPY [[PTR_ADD60]](p5)
-    ; CHECK: G_STORE [[UV58]](s32), [[COPY59]](p5) :: (store (s32) into %stack.0 + 232, align 8, basealign 256, addrspace 5)
-    ; CHECK: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 236
-    ; CHECK: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32)
-    ; CHECK: [[COPY60:%[0-9]+]]:_(p5) = COPY [[PTR_ADD61]](p5)
-    ; CHECK: G_STORE [[UV59]](s32), [[COPY60]](p5) :: (store (s32) into %stack.0 + 236, basealign 256, addrspace 5)
-    ; CHECK: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 240
-    ; CHECK: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32)
-    ; CHECK: [[COPY61:%[0-9]+]]:_(p5) = COPY [[PTR_ADD62]](p5)
-    ; CHECK: G_STORE [[UV60]](s32), [[COPY61]](p5) :: (store (s32) into %stack.0 + 240, align 16, basealign 256, addrspace 5)
-    ; CHECK: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 244
-    ; CHECK: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32)
-    ; CHECK: [[COPY62:%[0-9]+]]:_(p5) = COPY [[PTR_ADD63]](p5)
-    ; CHECK: G_STORE [[UV61]](s32), [[COPY62]](p5) :: (store (s32) into %stack.0 + 244, basealign 256, addrspace 5)
-    ; CHECK: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 248
-    ; CHECK: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32)
-    ; CHECK: [[COPY63:%[0-9]+]]:_(p5) = COPY [[PTR_ADD64]](p5)
-    ; CHECK: G_STORE [[UV62]](s32), [[COPY63]](p5) :: (store (s32) into %stack.0 + 248, align 8, basealign 256, addrspace 5)
-    ; CHECK: [[C66:%[0-9]+]]:_(s32) = G_CONSTANT i32 252
-    ; CHECK: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C66]](s32)
-    ; CHECK: [[COPY64:%[0-9]+]]:_(p5) = COPY [[PTR_ADD65]](p5)
-    ; CHECK: G_STORE [[UV63]](s32), [[COPY64]](p5) :: (store (s32) into %stack.0 + 252, basealign 256, addrspace 5)
-    ; CHECK: [[C67:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C67]]
-    ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[C4]]
-    ; CHECK: [[PTR_ADD66:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s32)
-    ; CHECK: G_STORE [[C3]](s32), [[PTR_ADD66]](p5) :: (store (s32), addrspace 5)
-    ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s32), align 256, addrspace 5)
-    ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
-    ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
-    ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
-    ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5)
-    ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 36, addrspace 5)
-    ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5)
-    ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 44, addrspace 5)
-    ; CHECK: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5)
-    ; CHECK: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load (s32) from unknown-address + 52, addrspace 5)
-    ; CHECK: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5)
-    ; CHECK: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load (s32) from unknown-address + 60, addrspace 5)
-    ; CHECK: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s32) from unknown-address + 64, align 64, addrspace 5)
-    ; CHECK: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load (s32) from unknown-address + 68, addrspace 5)
-    ; CHECK: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load (s32) from unknown-address + 72, align 8, addrspace 5)
-    ; CHECK: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load (s32) from unknown-address + 76, addrspace 5)
-    ; CHECK: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s32) from unknown-address + 80, align 16, addrspace 5)
-    ; CHECK: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p5) :: (load (s32) from unknown-address + 84, addrspace 5)
-    ; CHECK: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p5) :: (load (s32) from unknown-address + 88, align 8, addrspace 5)
-    ; CHECK: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p5) :: (load (s32) from unknown-address + 92, addrspace 5)
-    ; CHECK: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p5) :: (load (s32) from unknown-address + 96, align 32, addrspace 5)
-    ; CHECK: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p5) :: (load (s32) from unknown-address + 100, addrspace 5)
-    ; CHECK: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p5) :: (load (s32) from unknown-address + 104, align 8, addrspace 5)
-    ; CHECK: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p5) :: (load (s32) from unknown-address + 108, addrspace 5)
-    ; CHECK: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p5) :: (load (s32) from unknown-address + 112, align 16, addrspace 5)
-    ; CHECK: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD31]](p5) :: (load (s32) from unknown-address + 116, addrspace 5)
-    ; CHECK: [[LOAD34:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD32]](p5) :: (load (s32) from unknown-address + 120, align 8, addrspace 5)
-    ; CHECK: [[LOAD35:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD33]](p5) :: (load (s32) from unknown-address + 124, addrspace 5)
-    ; CHECK: [[LOAD36:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD34]](p5) :: (load (s32) from unknown-address + 128, align 128, addrspace 5)
-    ; CHECK: [[LOAD37:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD35]](p5) :: (load (s32) from unknown-address + 132, addrspace 5)
-    ; CHECK: [[LOAD38:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD36]](p5) :: (load (s32) from unknown-address + 136, align 8, addrspace 5)
-    ; CHECK: [[LOAD39:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD37]](p5) :: (load (s32) from unknown-address + 140, addrspace 5)
-    ; CHECK: [[LOAD40:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD38]](p5) :: (load (s32) from unknown-address + 144, align 16, addrspace 5)
-    ; CHECK: [[LOAD41:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD39]](p5) :: (load (s32) from unknown-address + 148, addrspace 5)
-    ; CHECK: [[LOAD42:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD40]](p5) :: (load (s32) from unknown-address + 152, align 8, addrspace 5)
-    ; CHECK: [[LOAD43:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD41]](p5) :: (load (s32) from unknown-address + 156, addrspace 5)
-    ; CHECK: [[LOAD44:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD42]](p5) :: (load (s32) from unknown-address + 160, align 32, addrspace 5)
-    ; CHECK: [[LOAD45:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD43]](p5) :: (load (s32) from unknown-address + 164, addrspace 5)
-    ; CHECK: [[LOAD46:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD44]](p5) :: (load (s32) from unknown-address + 168, align 8, addrspace 5)
-    ; CHECK: [[LOAD47:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD45]](p5) :: (load (s32) from unknown-address + 172, addrspace 5)
-    ; CHECK: [[LOAD48:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD46]](p5) :: (load (s32) from unknown-address + 176, align 16, addrspace 5)
-    ; CHECK: [[LOAD49:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD47]](p5) :: (load (s32) from unknown-address + 180, addrspace 5)
-    ; CHECK: [[LOAD50:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD48]](p5) :: (load (s32) from unknown-address + 184, align 8, addrspace 5)
-    ; CHECK: [[LOAD51:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD49]](p5) :: (load (s32) from unknown-address + 188, addrspace 5)
-    ; CHECK: [[LOAD52:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD50]](p5) :: (load (s32) from unknown-address + 192, align 64, addrspace 5)
-    ; CHECK: [[LOAD53:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD51]](p5) :: (load (s32) from unknown-address + 196, addrspace 5)
-    ; CHECK: [[LOAD54:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD52]](p5) :: (load (s32) from unknown-address + 200, align 8, addrspace 5)
-    ; CHECK: [[LOAD55:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD53]](p5) :: (load (s32) from unknown-address + 204, addrspace 5)
-    ; CHECK: [[LOAD56:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD54]](p5) :: (load (s32) from unknown-address + 208, align 16, addrspace 5)
-    ; CHECK: [[LOAD57:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD55]](p5) :: (load (s32) from unknown-address + 212, addrspace 5)
-    ; CHECK: [[LOAD58:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD56]](p5) :: (load (s32) from unknown-address + 216, align 8, addrspace 5)
-    ; CHECK: [[LOAD59:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD57]](p5) :: (load (s32) from unknown-address + 220, addrspace 5)
-    ; CHECK: [[LOAD60:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD58]](p5) :: (load (s32) from unknown-address + 224, align 32, addrspace 5)
-    ; CHECK: [[LOAD61:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD59]](p5) :: (load (s32) from unknown-address + 228, addrspace 5)
-    ; CHECK: [[LOAD62:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD60]](p5) :: (load (s32) from unknown-address + 232, align 8, addrspace 5)
-    ; CHECK: [[LOAD63:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD61]](p5) :: (load (s32) from unknown-address + 236, addrspace 5)
-    ; CHECK: [[LOAD64:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD62]](p5) :: (load (s32) from unknown-address + 240, align 16, addrspace 5)
-    ; CHECK: [[LOAD65:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD63]](p5) :: (load (s32) from unknown-address + 244, addrspace 5)
-    ; CHECK: [[LOAD66:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD64]](p5) :: (load (s32) from unknown-address + 248, align 8, addrspace 5)
-    ; CHECK: [[LOAD67:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD65]](p5) :: (load (s32) from unknown-address + 252, addrspace 5)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
-    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32)
-    ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32)
-    ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD16]](s32), [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32)
-    ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32)
-    ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32)
-    ; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32)
-    ; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD32]](s32), [[LOAD33]](s32), [[LOAD34]](s32), [[LOAD35]](s32)
-    ; CHECK: [[BUILD_VECTOR8:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD36]](s32), [[LOAD37]](s32), [[LOAD38]](s32), [[LOAD39]](s32)
-    ; CHECK: [[BUILD_VECTOR9:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD40]](s32), [[LOAD41]](s32), [[LOAD42]](s32), [[LOAD43]](s32)
-    ; CHECK: [[BUILD_VECTOR10:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD44]](s32), [[LOAD45]](s32), [[LOAD46]](s32), [[LOAD47]](s32)
-    ; CHECK: [[BUILD_VECTOR11:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD48]](s32), [[LOAD49]](s32), [[LOAD50]](s32), [[LOAD51]](s32)
-    ; CHECK: [[BUILD_VECTOR12:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD52]](s32), [[LOAD53]](s32), [[LOAD54]](s32), [[LOAD55]](s32)
-    ; CHECK: [[BUILD_VECTOR13:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD56]](s32), [[LOAD57]](s32), [[LOAD58]](s32), [[LOAD59]](s32)
-    ; CHECK: [[BUILD_VECTOR14:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD60]](s32), [[LOAD61]](s32), [[LOAD62]](s32), [[LOAD63]](s32)
-    ; CHECK: [[BUILD_VECTOR15:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD64]](s32), [[LOAD65]](s32), [[LOAD66]](s32), [[LOAD67]](s32)
-    ; CHECK: [[COPY65:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY65]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; CHECK: [[C68:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CHECK: [[PTR_ADD67:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C68]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD67]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
-    ; CHECK: [[C69:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; CHECK: [[PTR_ADD68:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C69]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[PTR_ADD68]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1)
-    ; CHECK: [[C70:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-    ; CHECK: [[PTR_ADD69:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C70]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR3]](<4 x s32>), [[PTR_ADD69]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD70:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR4]](<4 x s32>), [[PTR_ADD70]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1)
-    ; CHECK: [[C71:%[0-9]+]]:_(s64) = G_CONSTANT i64 80
-    ; CHECK: [[PTR_ADD71:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C71]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR5]](<4 x s32>), [[PTR_ADD71]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1)
-    ; CHECK: [[C72:%[0-9]+]]:_(s64) = G_CONSTANT i64 96
-    ; CHECK: [[PTR_ADD72:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C72]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR6]](<4 x s32>), [[PTR_ADD72]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1)
-    ; CHECK: [[C73:%[0-9]+]]:_(s64) = G_CONSTANT i64 112
-    ; CHECK: [[PTR_ADD73:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C73]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR7]](<4 x s32>), [[PTR_ADD73]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD74:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C1]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR8]](<4 x s32>), [[PTR_ADD74]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1)
-    ; CHECK: [[C74:%[0-9]+]]:_(s64) = G_CONSTANT i64 144
-    ; CHECK: [[PTR_ADD75:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C74]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR9]](<4 x s32>), [[PTR_ADD75]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1)
-    ; CHECK: [[C75:%[0-9]+]]:_(s64) = G_CONSTANT i64 160
-    ; CHECK: [[PTR_ADD76:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C75]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR10]](<4 x s32>), [[PTR_ADD76]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1)
-    ; CHECK: [[C76:%[0-9]+]]:_(s64) = G_CONSTANT i64 176
-    ; CHECK: [[PTR_ADD77:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C76]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR11]](<4 x s32>), [[PTR_ADD77]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1)
-    ; CHECK: [[PTR_ADD78:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C2]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR12]](<4 x s32>), [[PTR_ADD78]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1)
-    ; CHECK: [[C77:%[0-9]+]]:_(s64) = G_CONSTANT i64 208
-    ; CHECK: [[PTR_ADD79:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C77]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR13]](<4 x s32>), [[PTR_ADD79]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1)
-    ; CHECK: [[C78:%[0-9]+]]:_(s64) = G_CONSTANT i64 224
-    ; CHECK: [[PTR_ADD80:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C78]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR14]](<4 x s32>), [[PTR_ADD80]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1)
-    ; CHECK: [[C79:%[0-9]+]]:_(s64) = G_CONSTANT i64 240
-    ; CHECK: [[PTR_ADD81:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C79]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR15]](<4 x s32>), [[PTR_ADD81]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 4, addrspace 4)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 64, align 4, addrspace 4)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
+    ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192
+    ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x s32>) from unknown-address + 192, align 4, addrspace 4)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12345
+    ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>)
+    ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>)
+    ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>)
+    ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>)
+    ; CHECK-NEXT: G_STORE [[UV]](s32), [[FRAME_INDEX]](p5) :: (store (s32) into %stack.0, align 256, addrspace 5)
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p5) = COPY [[PTR_ADD3]](p5)
+    ; CHECK-NEXT: G_STORE [[UV1]](s32), [[COPY2]](p5) :: (store (s32) into %stack.0 + 4, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p5) = COPY [[PTR_ADD4]](p5)
+    ; CHECK-NEXT: G_STORE [[UV2]](s32), [[COPY3]](p5) :: (store (s32) into %stack.0 + 8, align 8, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY [[PTR_ADD5]](p5)
+    ; CHECK-NEXT: G_STORE [[UV3]](s32), [[COPY4]](p5) :: (store (s32) into %stack.0 + 12, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32)
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p5) = COPY [[PTR_ADD6]](p5)
+    ; CHECK-NEXT: G_STORE [[UV4]](s32), [[COPY5]](p5) :: (store (s32) into %stack.0 + 16, align 16, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32)
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p5) = COPY [[PTR_ADD7]](p5)
+    ; CHECK-NEXT: G_STORE [[UV5]](s32), [[COPY6]](p5) :: (store (s32) into %stack.0 + 20, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32)
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p5) = COPY [[PTR_ADD8]](p5)
+    ; CHECK-NEXT: G_STORE [[UV6]](s32), [[COPY7]](p5) :: (store (s32) into %stack.0 + 24, align 8, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32)
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p5) = COPY [[PTR_ADD9]](p5)
+    ; CHECK-NEXT: G_STORE [[UV7]](s32), [[COPY8]](p5) :: (store (s32) into %stack.0 + 28, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32)
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY [[PTR_ADD10]](p5)
+    ; CHECK-NEXT: G_STORE [[UV8]](s32), [[COPY9]](p5) :: (store (s32) into %stack.0 + 32, align 32, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 36
+    ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32)
+    ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p5) = COPY [[PTR_ADD11]](p5)
+    ; CHECK-NEXT: G_STORE [[UV9]](s32), [[COPY10]](p5) :: (store (s32) into %stack.0 + 36, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32)
+    ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p5) = COPY [[PTR_ADD12]](p5)
+    ; CHECK-NEXT: G_STORE [[UV10]](s32), [[COPY11]](p5) :: (store (s32) into %stack.0 + 40, align 8, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 44
+    ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32)
+    ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p5) = COPY [[PTR_ADD13]](p5)
+    ; CHECK-NEXT: G_STORE [[UV11]](s32), [[COPY12]](p5) :: (store (s32) into %stack.0 + 44, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
+    ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32)
+    ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p5) = COPY [[PTR_ADD14]](p5)
+    ; CHECK-NEXT: G_STORE [[UV12]](s32), [[COPY13]](p5) :: (store (s32) into %stack.0 + 48, align 16, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 52
+    ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32)
+    ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(p5) = COPY [[PTR_ADD15]](p5)
+    ; CHECK-NEXT: G_STORE [[UV13]](s32), [[COPY14]](p5) :: (store (s32) into %stack.0 + 52, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 56
+    ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32)
+    ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(p5) = COPY [[PTR_ADD16]](p5)
+    ; CHECK-NEXT: G_STORE [[UV14]](s32), [[COPY15]](p5) :: (store (s32) into %stack.0 + 56, align 8, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 60
+    ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32)
+    ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY [[PTR_ADD17]](p5)
+    ; CHECK-NEXT: G_STORE [[UV15]](s32), [[COPY16]](p5) :: (store (s32) into %stack.0 + 60, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
+    ; CHECK-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32)
+    ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY [[PTR_ADD18]](p5)
+    ; CHECK-NEXT: G_STORE [[UV16]](s32), [[COPY17]](p5) :: (store (s32) into %stack.0 + 64, align 64, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 68
+    ; CHECK-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32)
+    ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(p5) = COPY [[PTR_ADD19]](p5)
+    ; CHECK-NEXT: G_STORE [[UV17]](s32), [[COPY18]](p5) :: (store (s32) into %stack.0 + 68, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 72
+    ; CHECK-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32)
+    ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(p5) = COPY [[PTR_ADD20]](p5)
+    ; CHECK-NEXT: G_STORE [[UV18]](s32), [[COPY19]](p5) :: (store (s32) into %stack.0 + 72, align 8, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 76
+    ; CHECK-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32)
+    ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY [[PTR_ADD21]](p5)
+    ; CHECK-NEXT: G_STORE [[UV19]](s32), [[COPY20]](p5) :: (store (s32) into %stack.0 + 76, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 80
+    ; CHECK-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32)
+    ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(p5) = COPY [[PTR_ADD22]](p5)
+    ; CHECK-NEXT: G_STORE [[UV20]](s32), [[COPY21]](p5) :: (store (s32) into %stack.0 + 80, align 16, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 84
+    ; CHECK-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32)
+    ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(p5) = COPY [[PTR_ADD23]](p5)
+    ; CHECK-NEXT: G_STORE [[UV21]](s32), [[COPY22]](p5) :: (store (s32) into %stack.0 + 84, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 88
+    ; CHECK-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32)
+    ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(p5) = COPY [[PTR_ADD24]](p5)
+    ; CHECK-NEXT: G_STORE [[UV22]](s32), [[COPY23]](p5) :: (store (s32) into %stack.0 + 88, align 8, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 92
+    ; CHECK-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32)
+    ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(p5) = COPY [[PTR_ADD25]](p5)
+    ; CHECK-NEXT: G_STORE [[UV23]](s32), [[COPY24]](p5) :: (store (s32) into %stack.0 + 92, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 96
+    ; CHECK-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32)
+    ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(p5) = COPY [[PTR_ADD26]](p5)
+    ; CHECK-NEXT: G_STORE [[UV24]](s32), [[COPY25]](p5) :: (store (s32) into %stack.0 + 96, align 32, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
+    ; CHECK-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32)
+    ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(p5) = COPY [[PTR_ADD27]](p5)
+    ; CHECK-NEXT: G_STORE [[UV25]](s32), [[COPY26]](p5) :: (store (s32) into %stack.0 + 100, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 104
+    ; CHECK-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32)
+    ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(p5) = COPY [[PTR_ADD28]](p5)
+    ; CHECK-NEXT: G_STORE [[UV26]](s32), [[COPY27]](p5) :: (store (s32) into %stack.0 + 104, align 8, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 108
+    ; CHECK-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32)
+    ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(p5) = COPY [[PTR_ADD29]](p5)
+    ; CHECK-NEXT: G_STORE [[UV27]](s32), [[COPY28]](p5) :: (store (s32) into %stack.0 + 108, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 112
+    ; CHECK-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32)
+    ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(p5) = COPY [[PTR_ADD30]](p5)
+    ; CHECK-NEXT: G_STORE [[UV28]](s32), [[COPY29]](p5) :: (store (s32) into %stack.0 + 112, align 16, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 116
+    ; CHECK-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32)
+    ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(p5) = COPY [[PTR_ADD31]](p5)
+    ; CHECK-NEXT: G_STORE [[UV29]](s32), [[COPY30]](p5) :: (store (s32) into %stack.0 + 116, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 120
+    ; CHECK-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32)
+    ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p5) = COPY [[PTR_ADD32]](p5)
+    ; CHECK-NEXT: G_STORE [[UV30]](s32), [[COPY31]](p5) :: (store (s32) into %stack.0 + 120, align 8, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 124
+    ; CHECK-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32)
+    ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p5) = COPY [[PTR_ADD33]](p5)
+    ; CHECK-NEXT: G_STORE [[UV31]](s32), [[COPY32]](p5) :: (store (s32) into %stack.0 + 124, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
+    ; CHECK-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32)
+    ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p5) = COPY [[PTR_ADD34]](p5)
+    ; CHECK-NEXT: G_STORE [[UV32]](s32), [[COPY33]](p5) :: (store (s32) into %stack.0 + 128, align 128, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 132
+    ; CHECK-NEXT: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32)
+    ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5)
+    ; CHECK-NEXT: G_STORE [[UV33]](s32), [[COPY34]](p5) :: (store (s32) into %stack.0 + 132, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 136
+    ; CHECK-NEXT: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32)
+    ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(p5) = COPY [[PTR_ADD36]](p5)
+    ; CHECK-NEXT: G_STORE [[UV34]](s32), [[COPY35]](p5) :: (store (s32) into %stack.0 + 136, align 8, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 140
+    ; CHECK-NEXT: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32)
+    ; CHECK-NEXT: [[COPY36:%[0-9]+]]:_(p5) = COPY [[PTR_ADD37]](p5)
+    ; CHECK-NEXT: G_STORE [[UV35]](s32), [[COPY36]](p5) :: (store (s32) into %stack.0 + 140, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 144
+    ; CHECK-NEXT: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32)
+    ; CHECK-NEXT: [[COPY37:%[0-9]+]]:_(p5) = COPY [[PTR_ADD38]](p5)
+    ; CHECK-NEXT: G_STORE [[UV36]](s32), [[COPY37]](p5) :: (store (s32) into %stack.0 + 144, align 16, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 148
+    ; CHECK-NEXT: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32)
+    ; CHECK-NEXT: [[COPY38:%[0-9]+]]:_(p5) = COPY [[PTR_ADD39]](p5)
+    ; CHECK-NEXT: G_STORE [[UV37]](s32), [[COPY38]](p5) :: (store (s32) into %stack.0 + 148, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 152
+    ; CHECK-NEXT: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32)
+    ; CHECK-NEXT: [[COPY39:%[0-9]+]]:_(p5) = COPY [[PTR_ADD40]](p5)
+    ; CHECK-NEXT: G_STORE [[UV38]](s32), [[COPY39]](p5) :: (store (s32) into %stack.0 + 152, align 8, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 156
+    ; CHECK-NEXT: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32)
+    ; CHECK-NEXT: [[COPY40:%[0-9]+]]:_(p5) = COPY [[PTR_ADD41]](p5)
+    ; CHECK-NEXT: G_STORE [[UV39]](s32), [[COPY40]](p5) :: (store (s32) into %stack.0 + 156, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 160
+    ; CHECK-NEXT: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32)
+    ; CHECK-NEXT: [[COPY41:%[0-9]+]]:_(p5) = COPY [[PTR_ADD42]](p5)
+    ; CHECK-NEXT: G_STORE [[UV40]](s32), [[COPY41]](p5) :: (store (s32) into %stack.0 + 160, align 32, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 164
+    ; CHECK-NEXT: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32)
+    ; CHECK-NEXT: [[COPY42:%[0-9]+]]:_(p5) = COPY [[PTR_ADD43]](p5)
+    ; CHECK-NEXT: G_STORE [[UV41]](s32), [[COPY42]](p5) :: (store (s32) into %stack.0 + 164, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 168
+    ; CHECK-NEXT: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32)
+    ; CHECK-NEXT: [[COPY43:%[0-9]+]]:_(p5) = COPY [[PTR_ADD44]](p5)
+    ; CHECK-NEXT: G_STORE [[UV42]](s32), [[COPY43]](p5) :: (store (s32) into %stack.0 + 168, align 8, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 172
+    ; CHECK-NEXT: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32)
+    ; CHECK-NEXT: [[COPY44:%[0-9]+]]:_(p5) = COPY [[PTR_ADD45]](p5)
+    ; CHECK-NEXT: G_STORE [[UV43]](s32), [[COPY44]](p5) :: (store (s32) into %stack.0 + 172, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 176
+    ; CHECK-NEXT: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32)
+    ; CHECK-NEXT: [[COPY45:%[0-9]+]]:_(p5) = COPY [[PTR_ADD46]](p5)
+    ; CHECK-NEXT: G_STORE [[UV44]](s32), [[COPY45]](p5) :: (store (s32) into %stack.0 + 176, align 16, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 180
+    ; CHECK-NEXT: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32)
+    ; CHECK-NEXT: [[COPY46:%[0-9]+]]:_(p5) = COPY [[PTR_ADD47]](p5)
+    ; CHECK-NEXT: G_STORE [[UV45]](s32), [[COPY46]](p5) :: (store (s32) into %stack.0 + 180, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 184
+    ; CHECK-NEXT: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32)
+    ; CHECK-NEXT: [[COPY47:%[0-9]+]]:_(p5) = COPY [[PTR_ADD48]](p5)
+    ; CHECK-NEXT: G_STORE [[UV46]](s32), [[COPY47]](p5) :: (store (s32) into %stack.0 + 184, align 8, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 188
+    ; CHECK-NEXT: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32)
+    ; CHECK-NEXT: [[COPY48:%[0-9]+]]:_(p5) = COPY [[PTR_ADD49]](p5)
+    ; CHECK-NEXT: G_STORE [[UV47]](s32), [[COPY48]](p5) :: (store (s32) into %stack.0 + 188, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 192
+    ; CHECK-NEXT: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32)
+    ; CHECK-NEXT: [[COPY49:%[0-9]+]]:_(p5) = COPY [[PTR_ADD50]](p5)
+    ; CHECK-NEXT: G_STORE [[UV48]](s32), [[COPY49]](p5) :: (store (s32) into %stack.0 + 192, align 64, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 196
+    ; CHECK-NEXT: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32)
+    ; CHECK-NEXT: [[COPY50:%[0-9]+]]:_(p5) = COPY [[PTR_ADD51]](p5)
+    ; CHECK-NEXT: G_STORE [[UV49]](s32), [[COPY50]](p5) :: (store (s32) into %stack.0 + 196, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 200
+    ; CHECK-NEXT: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32)
+    ; CHECK-NEXT: [[COPY51:%[0-9]+]]:_(p5) = COPY [[PTR_ADD52]](p5)
+    ; CHECK-NEXT: G_STORE [[UV50]](s32), [[COPY51]](p5) :: (store (s32) into %stack.0 + 200, align 8, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 204
+    ; CHECK-NEXT: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32)
+    ; CHECK-NEXT: [[COPY52:%[0-9]+]]:_(p5) = COPY [[PTR_ADD53]](p5)
+    ; CHECK-NEXT: G_STORE [[UV51]](s32), [[COPY52]](p5) :: (store (s32) into %stack.0 + 204, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 208
+    ; CHECK-NEXT: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32)
+    ; CHECK-NEXT: [[COPY53:%[0-9]+]]:_(p5) = COPY [[PTR_ADD54]](p5)
+    ; CHECK-NEXT: G_STORE [[UV52]](s32), [[COPY53]](p5) :: (store (s32) into %stack.0 + 208, align 16, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 212
+    ; CHECK-NEXT: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32)
+    ; CHECK-NEXT: [[COPY54:%[0-9]+]]:_(p5) = COPY [[PTR_ADD55]](p5)
+    ; CHECK-NEXT: G_STORE [[UV53]](s32), [[COPY54]](p5) :: (store (s32) into %stack.0 + 212, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 216
+    ; CHECK-NEXT: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32)
+    ; CHECK-NEXT: [[COPY55:%[0-9]+]]:_(p5) = COPY [[PTR_ADD56]](p5)
+    ; CHECK-NEXT: G_STORE [[UV54]](s32), [[COPY55]](p5) :: (store (s32) into %stack.0 + 216, align 8, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 220
+    ; CHECK-NEXT: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32)
+    ; CHECK-NEXT: [[COPY56:%[0-9]+]]:_(p5) = COPY [[PTR_ADD57]](p5)
+    ; CHECK-NEXT: G_STORE [[UV55]](s32), [[COPY56]](p5) :: (store (s32) into %stack.0 + 220, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 224
+    ; CHECK-NEXT: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32)
+    ; CHECK-NEXT: [[COPY57:%[0-9]+]]:_(p5) = COPY [[PTR_ADD58]](p5)
+    ; CHECK-NEXT: G_STORE [[UV56]](s32), [[COPY57]](p5) :: (store (s32) into %stack.0 + 224, align 32, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 228
+    ; CHECK-NEXT: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32)
+    ; CHECK-NEXT: [[COPY58:%[0-9]+]]:_(p5) = COPY [[PTR_ADD59]](p5)
+    ; CHECK-NEXT: G_STORE [[UV57]](s32), [[COPY58]](p5) :: (store (s32) into %stack.0 + 228, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 232
+    ; CHECK-NEXT: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32)
+    ; CHECK-NEXT: [[COPY59:%[0-9]+]]:_(p5) = COPY [[PTR_ADD60]](p5)
+    ; CHECK-NEXT: G_STORE [[UV58]](s32), [[COPY59]](p5) :: (store (s32) into %stack.0 + 232, align 8, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 236
+    ; CHECK-NEXT: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32)
+    ; CHECK-NEXT: [[COPY60:%[0-9]+]]:_(p5) = COPY [[PTR_ADD61]](p5)
+    ; CHECK-NEXT: G_STORE [[UV59]](s32), [[COPY60]](p5) :: (store (s32) into %stack.0 + 236, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 240
+    ; CHECK-NEXT: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32)
+    ; CHECK-NEXT: [[COPY61:%[0-9]+]]:_(p5) = COPY [[PTR_ADD62]](p5)
+    ; CHECK-NEXT: G_STORE [[UV60]](s32), [[COPY61]](p5) :: (store (s32) into %stack.0 + 240, align 16, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 244
+    ; CHECK-NEXT: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32)
+    ; CHECK-NEXT: [[COPY62:%[0-9]+]]:_(p5) = COPY [[PTR_ADD63]](p5)
+    ; CHECK-NEXT: G_STORE [[UV61]](s32), [[COPY62]](p5) :: (store (s32) into %stack.0 + 244, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 248
+    ; CHECK-NEXT: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32)
+    ; CHECK-NEXT: [[COPY63:%[0-9]+]]:_(p5) = COPY [[PTR_ADD64]](p5)
+    ; CHECK-NEXT: G_STORE [[UV62]](s32), [[COPY63]](p5) :: (store (s32) into %stack.0 + 248, align 8, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C66:%[0-9]+]]:_(s32) = G_CONSTANT i32 252
+    ; CHECK-NEXT: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C66]](s32)
+    ; CHECK-NEXT: [[COPY64:%[0-9]+]]:_(p5) = COPY [[PTR_ADD65]](p5)
+    ; CHECK-NEXT: G_STORE [[UV63]](s32), [[COPY64]](p5) :: (store (s32) into %stack.0 + 252, basealign 256, addrspace 5)
+    ; CHECK-NEXT: [[C67:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C67]]
+    ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[C4]]
+    ; CHECK-NEXT: [[PTR_ADD66:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s32)
+    ; CHECK-NEXT: G_STORE [[C3]](s32), [[PTR_ADD66]](p5) :: (store (s32), addrspace 5)
+    ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s32), align 256, addrspace 5)
+    ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
+    ; CHECK-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; CHECK-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
+    ; CHECK-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
+    ; CHECK-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5)
+    ; CHECK-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 36, addrspace 5)
+    ; CHECK-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5)
+    ; CHECK-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 44, addrspace 5)
+    ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5)
+    ; CHECK-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load (s32) from unknown-address + 52, addrspace 5)
+    ; CHECK-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5)
+    ; CHECK-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load (s32) from unknown-address + 60, addrspace 5)
+    ; CHECK-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s32) from unknown-address + 64, align 64, addrspace 5)
+    ; CHECK-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load (s32) from unknown-address + 68, addrspace 5)
+    ; CHECK-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load (s32) from unknown-address + 72, align 8, addrspace 5)
+    ; CHECK-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load (s32) from unknown-address + 76, addrspace 5)
+    ; CHECK-NEXT: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s32) from unknown-address + 80, align 16, addrspace 5)
+    ; CHECK-NEXT: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p5) :: (load (s32) from unknown-address + 84, addrspace 5)
+    ; CHECK-NEXT: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p5) :: (load (s32) from unknown-address + 88, align 8, addrspace 5)
+    ; CHECK-NEXT: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p5) :: (load (s32) from unknown-address + 92, addrspace 5)
+    ; CHECK-NEXT: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p5) :: (load (s32) from unknown-address + 96, align 32, addrspace 5)
+    ; CHECK-NEXT: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p5) :: (load (s32) from unknown-address + 100, addrspace 5)
+    ; CHECK-NEXT: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p5) :: (load (s32) from unknown-address + 104, align 8, addrspace 5)
+    ; CHECK-NEXT: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p5) :: (load (s32) from unknown-address + 108, addrspace 5)
+    ; CHECK-NEXT: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p5) :: (load (s32) from unknown-address + 112, align 16, addrspace 5)
+    ; CHECK-NEXT: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD31]](p5) :: (load (s32) from unknown-address + 116, addrspace 5)
+    ; CHECK-NEXT: [[LOAD34:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD32]](p5) :: (load (s32) from unknown-address + 120, align 8, addrspace 5)
+    ; CHECK-NEXT: [[LOAD35:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD33]](p5) :: (load (s32) from unknown-address + 124, addrspace 5)
+    ; CHECK-NEXT: [[LOAD36:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD34]](p5) :: (load (s32) from unknown-address + 128, align 128, addrspace 5)
+    ; CHECK-NEXT: [[LOAD37:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD35]](p5) :: (load (s32) from unknown-address + 132, addrspace 5)
+    ; CHECK-NEXT: [[LOAD38:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD36]](p5) :: (load (s32) from unknown-address + 136, align 8, addrspace 5)
+    ; CHECK-NEXT: [[LOAD39:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD37]](p5) :: (load (s32) from unknown-address + 140, addrspace 5)
+    ; CHECK-NEXT: [[LOAD40:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD38]](p5) :: (load (s32) from unknown-address + 144, align 16, addrspace 5)
+    ; CHECK-NEXT: [[LOAD41:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD39]](p5) :: (load (s32) from unknown-address + 148, addrspace 5)
+    ; CHECK-NEXT: [[LOAD42:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD40]](p5) :: (load (s32) from unknown-address + 152, align 8, addrspace 5)
+    ; CHECK-NEXT: [[LOAD43:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD41]](p5) :: (load (s32) from unknown-address + 156, addrspace 5)
+    ; CHECK-NEXT: [[LOAD44:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD42]](p5) :: (load (s32) from unknown-address + 160, align 32, addrspace 5)
+    ; CHECK-NEXT: [[LOAD45:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD43]](p5) :: (load (s32) from unknown-address + 164, addrspace 5)
+    ; CHECK-NEXT: [[LOAD46:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD44]](p5) :: (load (s32) from unknown-address + 168, align 8, addrspace 5)
+    ; CHECK-NEXT: [[LOAD47:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD45]](p5) :: (load (s32) from unknown-address + 172, addrspace 5)
+    ; CHECK-NEXT: [[LOAD48:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD46]](p5) :: (load (s32) from unknown-address + 176, align 16, addrspace 5)
+    ; CHECK-NEXT: [[LOAD49:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD47]](p5) :: (load (s32) from unknown-address + 180, addrspace 5)
+    ; CHECK-NEXT: [[LOAD50:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD48]](p5) :: (load (s32) from unknown-address + 184, align 8, addrspace 5)
+    ; CHECK-NEXT: [[LOAD51:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD49]](p5) :: (load (s32) from unknown-address + 188, addrspace 5)
+    ; CHECK-NEXT: [[LOAD52:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD50]](p5) :: (load (s32) from unknown-address + 192, align 64, addrspace 5)
+    ; CHECK-NEXT: [[LOAD53:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD51]](p5) :: (load (s32) from unknown-address + 196, addrspace 5)
+    ; CHECK-NEXT: [[LOAD54:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD52]](p5) :: (load (s32) from unknown-address + 200, align 8, addrspace 5)
+    ; CHECK-NEXT: [[LOAD55:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD53]](p5) :: (load (s32) from unknown-address + 204, addrspace 5)
+    ; CHECK-NEXT: [[LOAD56:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD54]](p5) :: (load (s32) from unknown-address + 208, align 16, addrspace 5)
+    ; CHECK-NEXT: [[LOAD57:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD55]](p5) :: (load (s32) from unknown-address + 212, addrspace 5)
+    ; CHECK-NEXT: [[LOAD58:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD56]](p5) :: (load (s32) from unknown-address + 216, align 8, addrspace 5)
+    ; CHECK-NEXT: [[LOAD59:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD57]](p5) :: (load (s32) from unknown-address + 220, addrspace 5)
+    ; CHECK-NEXT: [[LOAD60:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD58]](p5) :: (load (s32) from unknown-address + 224, align 32, addrspace 5)
+    ; CHECK-NEXT: [[LOAD61:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD59]](p5) :: (load (s32) from unknown-address + 228, addrspace 5)
+    ; CHECK-NEXT: [[LOAD62:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD60]](p5) :: (load (s32) from unknown-address + 232, align 8, addrspace 5)
+    ; CHECK-NEXT: [[LOAD63:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD61]](p5) :: (load (s32) from unknown-address + 236, addrspace 5)
+    ; CHECK-NEXT: [[LOAD64:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD62]](p5) :: (load (s32) from unknown-address + 240, align 16, addrspace 5)
+    ; CHECK-NEXT: [[LOAD65:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD63]](p5) :: (load (s32) from unknown-address + 244, addrspace 5)
+    ; CHECK-NEXT: [[LOAD66:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD64]](p5) :: (load (s32) from unknown-address + 248, align 8, addrspace 5)
+    ; CHECK-NEXT: [[LOAD67:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD65]](p5) :: (load (s32) from unknown-address + 252, addrspace 5)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD16]](s32), [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD32]](s32), [[LOAD33]](s32), [[LOAD34]](s32), [[LOAD35]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD36]](s32), [[LOAD37]](s32), [[LOAD38]](s32), [[LOAD39]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR9:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD40]](s32), [[LOAD41]](s32), [[LOAD42]](s32), [[LOAD43]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR10:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD44]](s32), [[LOAD45]](s32), [[LOAD46]](s32), [[LOAD47]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD48]](s32), [[LOAD49]](s32), [[LOAD50]](s32), [[LOAD51]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR12:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD52]](s32), [[LOAD53]](s32), [[LOAD54]](s32), [[LOAD55]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR13:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD56]](s32), [[LOAD57]](s32), [[LOAD58]](s32), [[LOAD59]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR14:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD60]](s32), [[LOAD61]](s32), [[LOAD62]](s32), [[LOAD63]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR15:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD64]](s32), [[LOAD65]](s32), [[LOAD66]](s32), [[LOAD67]](s32)
+    ; CHECK-NEXT: [[COPY65:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY65]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; CHECK-NEXT: [[C68:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK-NEXT: [[PTR_ADD67:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C68]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD67]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C69:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CHECK-NEXT: [[PTR_ADD68:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C69]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[PTR_ADD68]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C70:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+    ; CHECK-NEXT: [[PTR_ADD69:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C70]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR3]](<4 x s32>), [[PTR_ADD69]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD70:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR4]](<4 x s32>), [[PTR_ADD70]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C71:%[0-9]+]]:_(s64) = G_CONSTANT i64 80
+    ; CHECK-NEXT: [[PTR_ADD71:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C71]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR5]](<4 x s32>), [[PTR_ADD71]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C72:%[0-9]+]]:_(s64) = G_CONSTANT i64 96
+    ; CHECK-NEXT: [[PTR_ADD72:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C72]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR6]](<4 x s32>), [[PTR_ADD72]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C73:%[0-9]+]]:_(s64) = G_CONSTANT i64 112
+    ; CHECK-NEXT: [[PTR_ADD73:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C73]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR7]](<4 x s32>), [[PTR_ADD73]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD74:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C1]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR8]](<4 x s32>), [[PTR_ADD74]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C74:%[0-9]+]]:_(s64) = G_CONSTANT i64 144
+    ; CHECK-NEXT: [[PTR_ADD75:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C74]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR9]](<4 x s32>), [[PTR_ADD75]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C75:%[0-9]+]]:_(s64) = G_CONSTANT i64 160
+    ; CHECK-NEXT: [[PTR_ADD76:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C75]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR10]](<4 x s32>), [[PTR_ADD76]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C76:%[0-9]+]]:_(s64) = G_CONSTANT i64 176
+    ; CHECK-NEXT: [[PTR_ADD77:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C76]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR11]](<4 x s32>), [[PTR_ADD77]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1)
+    ; CHECK-NEXT: [[PTR_ADD78:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C2]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR12]](<4 x s32>), [[PTR_ADD78]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C77:%[0-9]+]]:_(s64) = G_CONSTANT i64 208
+    ; CHECK-NEXT: [[PTR_ADD79:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C77]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR13]](<4 x s32>), [[PTR_ADD79]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C78:%[0-9]+]]:_(s64) = G_CONSTANT i64 224
+    ; CHECK-NEXT: [[PTR_ADD80:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C78]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR14]](<4 x s32>), [[PTR_ADD80]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1)
+    ; CHECK-NEXT: [[C79:%[0-9]+]]:_(s64) = G_CONSTANT i64 240
+    ; CHECK-NEXT: [[PTR_ADD81:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C79]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR15]](<4 x s32>), [[PTR_ADD81]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1)
     %0:_(p1) = COPY $sgpr0_sgpr1
     %1:_(s32) = COPY $sgpr2
     %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4)
@@ -788,49 +788,49 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2
     ; CHECK-LABEL: name: insert_vector_elt_varidx_v4s8
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
-    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[SHL3]](s32)
-    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[SHL3]](s32)
-    ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
-    ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL5]], [[C5]]
-    ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[XOR]]
-    ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]]
-    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[OR3]], [[C]](s32)
-    ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[OR3]], [[C1]](s32)
-    ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[OR3]], [[C2]](s32)
-    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C3]]
-    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL6]]
-    ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
-    ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL7]]
-    ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32)
-    ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL8]]
-    ; CHECK: $vgpr0 = COPY [[OR6]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
+    ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[SHL3]](s32)
+    ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[SHL3]](s32)
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL5]], [[C5]]
+    ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[XOR]]
+    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]]
+    ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[OR3]], [[C]](s32)
+    ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[OR3]], [[C1]](s32)
+    ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[OR3]], [[C2]](s32)
+    ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C3]]
+    ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
+    ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL6]]
+    ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL7]]
+    ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL8]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[OR6]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -849,105 +849,105 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
     ; CHECK-LABEL: name: insert_vector_elt_varidx_v8s8
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16)
-    ; CHECK: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16)
-    ; CHECK: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16)
-    ; CHECK: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]]
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16)
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]]
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
-    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16)
-    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]]
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C3]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16)
-    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]]
-    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
-    ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C5]](s32)
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[LSHR6]](s32)
-    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
-    ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
-    ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
-    ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[SHL6]](s32)
-    ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[SHL6]](s32)
-    ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
-    ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL8]], [[C7]]
-    ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[EVEC]], [[XOR]]
-    ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL7]]
-    ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[OR6]](s32), [[LSHR6]](s32)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC]](<2 x s32>)
-    ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32)
-    ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C4]](s32)
-    ; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32)
-    ; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; CHECK: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C4]](s32)
-    ; CHECK: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
-    ; CHECK: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C8]]
-    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32)
-    ; CHECK: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C8]]
-    ; CHECK: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND12]], [[C1]](s16)
-    ; CHECK: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND11]], [[SHL9]]
-    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32)
-    ; CHECK: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C8]]
-    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32)
-    ; CHECK: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C8]]
-    ; CHECK: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND14]], [[C1]](s16)
-    ; CHECK: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND13]], [[SHL10]]
-    ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
-    ; CHECK: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C8]]
-    ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32)
-    ; CHECK: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C8]]
-    ; CHECK: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND16]], [[C1]](s16)
-    ; CHECK: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND15]], [[SHL11]]
-    ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32)
-    ; CHECK: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C8]]
-    ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32)
-    ; CHECK: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C8]]
-    ; CHECK: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND18]], [[C1]](s16)
-    ; CHECK: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND17]], [[SHL12]]
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; CHECK: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
-    ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL13]]
-    ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16)
-    ; CHECK: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32)
-    ; CHECK: [[OR12:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL14]]
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR11]](s32), [[OR12]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16)
+    ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16)
+    ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16)
+    ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16)
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]]
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16)
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]]
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+    ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16)
+    ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]]
+    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C3]](s32)
+    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
+    ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
+    ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16)
+    ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]]
+    ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C5]](s32)
+    ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[LSHR6]](s32)
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
+    ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[SHL6]](s32)
+    ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[SHL6]](s32)
+    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL8]], [[C7]]
+    ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[EVEC]], [[XOR]]
+    ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL7]]
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[OR6]](s32), [[LSHR6]](s32)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC]](<2 x s32>)
+    ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32)
+    ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C4]](s32)
+    ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32)
+    ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C4]](s32)
+    ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+    ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C8]]
+    ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32)
+    ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C8]]
+    ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND12]], [[C1]](s16)
+    ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND11]], [[SHL9]]
+    ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32)
+    ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C8]]
+    ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32)
+    ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C8]]
+    ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND14]], [[C1]](s16)
+    ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND13]], [[SHL10]]
+    ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
+    ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C8]]
+    ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32)
+    ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C8]]
+    ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND16]], [[C1]](s16)
+    ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND15]], [[SHL11]]
+    ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32)
+    ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C8]]
+    ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32)
+    ; CHECK-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C8]]
+    ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND18]], [[C1]](s16)
+    ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND17]], [[SHL12]]
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+    ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL13]]
+    ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16)
+    ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32)
+    ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL14]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR11]](s32), [[OR12]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s32) = COPY $vgpr3

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir
index 563344f42ab68..1a8b37cf7d4ca 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir
@@ -9,9 +9,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s64_s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[COPY1]](s32), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[COPY1]](s32), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s64) = G_INSERT %0, %1, 0
@@ -25,9 +25,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s64_s32_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[COPY1]](s32), 32
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[COPY1]](s32), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s64) = G_INSERT %0, %1, 32
@@ -42,9 +42,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s64_s32_offset16
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[COPY1]](s32), 16
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[COPY1]](s32), 16
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s64) = G_INSERT %0, %1, 16
@@ -59,9 +59,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s96_s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY]], [[COPY1]](s32), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](s96)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY]], [[COPY1]](s32), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](s96)
     %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s32) = COPY $vgpr3
     %2:_(s96) = G_INSERT %0, %1, 0
@@ -75,9 +75,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s96_s32_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY]], [[COPY1]](s32), 32
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](s96)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY]], [[COPY1]](s32), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](s96)
     %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s32) = COPY $vgpr3
     %2:_(s96) = G_INSERT %0, %1, 32
@@ -91,9 +91,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s96_s32_offset64
     ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY]], [[COPY1]](s32), 64
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](s96)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY]], [[COPY1]](s32), 64
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](s96)
     %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s32) = COPY $vgpr3
     %2:_(s96) = G_INSERT %0, %1, 64
@@ -107,9 +107,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s128_s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s32), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s32), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = COPY $vgpr4
     %2:_(s128) = G_INSERT %0, %1, 0
@@ -123,9 +123,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s128_s32_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s32), 32
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s32), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = COPY $vgpr4
     %2:_(s128) = G_INSERT %0, %1, 32
@@ -139,9 +139,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s128_s32_offset64
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s32), 64
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s32), 64
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = COPY $vgpr4
     %2:_(s128) = G_INSERT %0, %1, 64
@@ -155,9 +155,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s128_s32_offset96
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s32), 96
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s32), 96
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = COPY $vgpr4
     %2:_(s128) = G_INSERT %0, %1, 96
@@ -171,9 +171,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s128_s64_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s64), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s64), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s64) = COPY $vgpr4_vgpr5
     %2:_(s128) = G_INSERT %0, %1, 0
@@ -187,9 +187,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s128_s64_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s64), 32
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s64), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s64) = COPY $vgpr4_vgpr5
     %2:_(s128) = G_INSERT %0, %1, 32
@@ -203,9 +203,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s128_s64_offset64
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s64), 64
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s64), 64
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s64) = COPY $vgpr4_vgpr5
     %2:_(s128) = G_INSERT %0, %1, 64
@@ -219,9 +219,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s128_s96_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s96), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s96), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s96) = COPY $vgpr4_vgpr5_vgpr6
     %2:_(s128) = G_INSERT %0, %1, 0
@@ -235,9 +235,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s128_s96_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s96), 32
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](s96), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s96) = COPY $vgpr4_vgpr5_vgpr6
     %2:_(s128) = G_INSERT %0, %1, 32
@@ -251,9 +251,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_p0_s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[INSERT:%[0-9]+]]:_(p0) = G_INSERT [[COPY]], [[COPY1]](s32), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](p0)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(p0) = G_INSERT [[COPY]], [[COPY1]](s32), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](p0)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(p0) = G_INSERT %0, %1, 0
@@ -267,9 +267,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_p0_s32_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[INSERT:%[0-9]+]]:_(p0) = G_INSERT [[COPY]], [[COPY1]](s32), 32
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](p0)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(p0) = G_INSERT [[COPY]], [[COPY1]](s32), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](p0)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(p0) = G_INSERT %0, %1, 32
@@ -283,9 +283,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s128_p0_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr4_vgpr5
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](p0), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](p0), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(p0) = COPY $vgpr4_vgpr5
     %2:_(s128) = G_INSERT %0, %1, 0
@@ -299,9 +299,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s128_p0_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr4_vgpr5
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](p0), 32
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](p0), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(p0) = COPY $vgpr4_vgpr5
     %2:_(s128) = G_INSERT %0, %1, 32
@@ -315,9 +315,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s128_p0_offset64
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr4_vgpr5
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](p0), 64
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[COPY1]](p0), 64
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(p0) = COPY $vgpr4_vgpr5
     %2:_(s128) = G_INSERT %0, %1, 64
@@ -332,10 +332,10 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s128_s16_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[TRUNC]](s16), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[TRUNC]](s16), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = COPY $vgpr4
     %2:_(s16) = G_TRUNC %1
@@ -351,10 +351,10 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s128_s16_offset16
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[TRUNC]](s16), 16
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[TRUNC]](s16), 16
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = COPY $vgpr4
     %2:_(s16) = G_TRUNC %1
@@ -370,10 +370,10 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s128_s16_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[TRUNC]](s16), 32
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[TRUNC]](s16), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = COPY $vgpr4
     %2:_(s16) = G_TRUNC %1
@@ -389,10 +389,10 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s128_s16_offset112
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[TRUNC]](s16), 112
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY]], [[TRUNC]](s16), 112
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = COPY $vgpr4
     %2:_(s16) = G_TRUNC %1
@@ -408,9 +408,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v2s32_s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(<2 x s32>) = G_INSERT %0, %1, 0
@@ -424,9 +424,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v2s32_s32_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(<2 x s32>) = G_INSERT %0, %1, 32
@@ -440,9 +440,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v3s32_s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](<3 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](<3 x s32>)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s32) = COPY $vgpr3
     %2:_(<3 x s32>) = G_INSERT %0, %1, 0
@@ -456,9 +456,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v3s32_s32_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](<3 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](<3 x s32>)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s32) = COPY $vgpr3
     %2:_(<3 x s32>) = G_INSERT %0, %1, 32
@@ -472,9 +472,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v3s32_s32_offset64
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 64
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](<3 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 64
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](<3 x s32>)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s32) = COPY $vgpr3
     %2:_(<3 x s32>) = G_INSERT %0, %1, 64
@@ -488,9 +488,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s32_s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = COPY $vgpr4
     %2:_(<4 x s32>) = G_INSERT %0, %1, 0
@@ -504,9 +504,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s32_s32_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = COPY $vgpr4
     %2:_(<4 x s32>) = G_INSERT %0, %1, 32
@@ -520,9 +520,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s32_s32_offset64
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 64
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 64
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = COPY $vgpr4
     %2:_(<4 x s32>) = G_INSERT %0, %1, 64
@@ -536,9 +536,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s32_s32_offset96
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 96
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 96
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = COPY $vgpr4
     %2:_(<4 x s32>) = G_INSERT %0, %1, 96
@@ -552,9 +552,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s32_s64_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s64), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s64), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s64) = COPY $vgpr4_vgpr5
     %2:_(<4 x s32>) = G_INSERT %0, %1, 0
@@ -568,9 +568,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s32_s64_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s64), 32
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s64), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s64) = COPY $vgpr4_vgpr5
     %2:_(<4 x s32>) = G_INSERT %0, %1, 32
@@ -584,9 +584,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s32_s64_offset64
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s64), 64
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s64), 64
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s64) = COPY $vgpr4_vgpr5
     %2:_(<4 x s32>) = G_INSERT %0, %1, 64
@@ -600,9 +600,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s32_s96_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s96), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s96), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s96) = COPY $vgpr4_vgpr5_vgpr6
     %2:_(<4 x s32>) = G_INSERT %0, %1, 0
@@ -616,9 +616,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s32_s96_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s96), 32
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s96), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s96) = COPY $vgpr4_vgpr5_vgpr6
     %2:_(<4 x s32>) = G_INSERT %0, %1, 32
@@ -632,9 +632,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s32_v2s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<2 x s32>), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<2 x s32>), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s32>) = COPY $vgpr4_vgpr5
     %2:_(<4 x s32>) = G_INSERT %0, %1, 0
@@ -648,9 +648,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s32_v2s32_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<2 x s32>), 32
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<2 x s32>), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s32>) = COPY $vgpr4_vgpr5
     %2:_(<4 x s32>) = G_INSERT %0, %1, 32
@@ -664,9 +664,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s32_v2s32_offset64
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<2 x s32>), 64
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<2 x s32>), 64
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s32>) = COPY $vgpr4_vgpr5
     %2:_(<4 x s32>) = G_INSERT %0, %1, 64
@@ -680,9 +680,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s32_v3s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<3 x s32>), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<3 x s32>), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6
     %2:_(<4 x s32>) = G_INSERT %0, %1, 0
@@ -696,9 +696,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s32_v3s32_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<3 x s32>), 32
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<3 x s32>), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6
     %2:_(<4 x s32>) = G_INSERT %0, %1, 32
@@ -712,9 +712,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s32_p0_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr4_vgpr5
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](p0), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](p0), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(p0) = COPY $vgpr4_vgpr5
     %2:_(<4 x s32>) = G_INSERT %0, %1, 0
@@ -728,9 +728,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s32_p0_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr4_vgpr5
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](p0), 32
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](p0), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(p0) = COPY $vgpr4_vgpr5
     %2:_(<4 x s32>) = G_INSERT %0, %1, 32
@@ -744,9 +744,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s32_p0_offset64
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr4_vgpr5
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](p0), 64
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](p0), 64
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(p0) = COPY $vgpr4_vgpr5
     %2:_(<4 x s32>) = G_INSERT %0, %1, 64
@@ -761,15 +761,15 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v2s16_s16_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[AND]]
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[AND]]
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %1
@@ -785,17 +785,17 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v2s16_s16_offset1
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -131071
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]]
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -131071
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]]
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %1
@@ -810,16 +810,16 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v2s16_s16_offset16
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]]
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]]
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32)
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]]
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]]
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %1
@@ -834,16 +834,16 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v3s16_s16_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
-    ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 0
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 0
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_EXTRACT %0, 0
     %2:_(s32) = COPY $vgpr2
@@ -861,16 +861,16 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v3s16_s16_offset16
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
-    ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 16
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 16
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_EXTRACT %0, 0
     %2:_(s32) = COPY $vgpr2
@@ -888,16 +888,16 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v3s16_s16_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
-    ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 32
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 32
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_EXTRACT %0, 0
     %2:_(s32) = COPY $vgpr2
@@ -915,15 +915,15 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v3s16_v2s16_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
-    ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
-    ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](<2 x s16>), 0
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](<2 x s16>), 0
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_EXTRACT %0, 0
     %2:_(<2 x s16>) = COPY $vgpr2
@@ -940,15 +940,15 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v3s16_v2s16_offset16
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
-    ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
-    ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](<2 x s16>), 16
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](<2 x s16>), 16
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_EXTRACT %0, 0
     %2:_(<2 x s16>) = COPY $vgpr2
@@ -965,15 +965,15 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v3s16_s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
-    ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](s32), 0
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](s32), 0
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_EXTRACT %0, 0
     %2:_(s32) = COPY $vgpr2
@@ -990,15 +990,15 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v3s16_s32_offset16
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
-    ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](s32), 16
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](s32), 16
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_EXTRACT %0, 0
     %2:_(s32) = COPY $vgpr2
@@ -1015,10 +1015,10 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s16_s16_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s16) = G_TRUNC %1
@@ -1033,10 +1033,10 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s16_s16_offset16
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 16
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 16
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s16) = G_TRUNC %1
@@ -1051,10 +1051,10 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s16_s16_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 32
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s16) = G_TRUNC %1
@@ -1069,10 +1069,10 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s16_s16_offset48
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 48
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 48
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s16) = G_TRUNC %1
@@ -1087,9 +1087,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s16_v2s16_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](<2 x s16>), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](<2 x s16>), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = COPY $vgpr2
     %2:_(<4 x s16>) = G_INSERT %0, %1, 0
@@ -1103,9 +1103,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s16_v2s16_offset16
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](<2 x s16>), 16
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](<2 x s16>), 16
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = COPY $vgpr2
     %2:_(<4 x s16>) = G_INSERT %0, %1, 16
@@ -1119,9 +1119,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s16_v2s16_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](<2 x s16>), 32
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](<2 x s16>), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = COPY $vgpr2
     %2:_(<4 x s16>) = G_INSERT %0, %1, 32
@@ -1135,10 +1135,10 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s16_v3s16_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[EXTRACT]](<3 x s16>), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[EXTRACT]](<3 x s16>), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
     %2:_(<3 x s16>) = G_EXTRACT %1, 0
@@ -1153,10 +1153,10 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s16_v3s16_offset16
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[EXTRACT]](<3 x s16>), 16
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[EXTRACT]](<3 x s16>), 16
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
     %2:_(<3 x s16>) = G_EXTRACT %1, 0
@@ -1171,9 +1171,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s16_s32_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(<4 x s16>) = G_INSERT %0, %1, 0
@@ -1187,9 +1187,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s16_s32_offset16
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](s32), 16
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](s32), 16
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(<4 x s16>) = G_INSERT %0, %1, 16
@@ -1203,9 +1203,9 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_v4s16_s32_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](s32), 32
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](s32), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(<4 x s16>) = G_INSERT %0, %1, 32
@@ -1220,10 +1220,10 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s64_s16_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[TRUNC]](s16), 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[TRUNC]](s16), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s16) = G_TRUNC %1
@@ -1238,10 +1238,10 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s64_s16_offset16
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[TRUNC]](s16), 16
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[TRUNC]](s16), 16
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s16) = G_TRUNC %1
@@ -1256,10 +1256,10 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s64_s16_offset32
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[TRUNC]](s16), 32
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[TRUNC]](s16), 32
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s16) = G_TRUNC %1
@@ -1274,10 +1274,10 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s64_s16_offset48
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[TRUNC]](s16), 48
-    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY]], [[TRUNC]](s16), 48
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s16) = G_TRUNC %1
@@ -1292,14 +1292,14 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s32_s16_offset0
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[AND]]
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[AND]]
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %1
@@ -1315,16 +1315,16 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s32_s16_offset1
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -131071
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]]
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -131071
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]]
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %1
@@ -1340,16 +1340,16 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s32_s16_offset8
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -16776961
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]]
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -16776961
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]]
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %1
@@ -1365,15 +1365,15 @@ body: |
 
     ; CHECK-LABEL: name: test_insert_s32_s16_offset16
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY3]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir
index 248b5b9e2d813..8e833ca7b5f7a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir
@@ -9,33 +9,33 @@ body: |
 
     ; CI-LABEL: name: test_load_constant32bit_s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0
-    ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
-    ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s8), addrspace 6)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[MV]], [[C1]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 6)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[MV]], [[C2]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 6)
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[MV]], [[C3]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 6)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C6]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s8), addrspace 6)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[MV]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 6)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[MV]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 6)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[MV]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 6)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C6]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p6) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 6)
     $vgpr0 = COPY %1
@@ -49,10 +49,10 @@ body: |
 
     ; CI-LABEL: name: test_load_constant32bit_s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0
-    ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
-    ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p6) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 6)
     $vgpr0 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
index 6f5ba5f77a683..b3b3bd295cfd8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
@@ -12,22 +12,22 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; CI: $vgpr0 = COPY [[AND]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; CI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; VI-LABEL: name: test_load_constant_s1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; VI: $vgpr0 = COPY [[AND]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; VI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; GFX9-LABEL: name: test_load_constant_s1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; GFX9: $vgpr0 = COPY [[AND]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s1) = G_LOAD %0 :: (load (s1), align 1, addrspace 4)
     %2:_(s32) = G_ZEXT %1
@@ -42,22 +42,22 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s2_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; CI: $vgpr0 = COPY [[AND]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; CI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; VI-LABEL: name: test_load_constant_s2_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; VI: $vgpr0 = COPY [[AND]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; VI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; GFX9-LABEL: name: test_load_constant_s2_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; GFX9: $vgpr0 = COPY [[AND]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s2) = G_LOAD %0 :: (load (s2), align 1, addrspace 4)
     %2:_(s32) = G_ZEXT %1
@@ -72,16 +72,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_constant_s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_constant_s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s8) = G_LOAD %0 :: (load (s8), align 4, addrspace 4)
     %2:_(s32) = G_ANYEXT %1
@@ -96,16 +96,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_constant_s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_constant_s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s8) = G_LOAD %0 :: (load (s8), align 1, addrspace 4)
     %2:_(s32) = G_ANYEXT %1
@@ -120,16 +120,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_constant_s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_constant_s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s16) = G_LOAD %0 :: (load (s16), align 4, addrspace 4)
     %2:_(s32) = G_ANYEXT %1
@@ -144,16 +144,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_constant_s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_constant_s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s16) = G_LOAD %0 :: (load (s16), align 2, addrspace 4)
     %2:_(s32) = G_ANYEXT %1
@@ -168,53 +168,53 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_constant_s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_constant_s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 4)
     %2:_(s32) = G_ANYEXT %1
@@ -229,16 +229,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_constant_s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_constant_s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 4)
     $vgpr0 = COPY %1
@@ -252,43 +252,43 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s32_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: $vgpr0 = COPY [[OR]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; VI-LABEL: name: test_load_constant_s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: $vgpr0 = COPY [[OR]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; GFX9-LABEL: name: test_load_constant_s32_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: $vgpr0 = COPY [[OR]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 4)
     $vgpr0 = COPY %1
@@ -302,85 +302,85 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_constant_s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_constant_s32_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 4)
     $vgpr0 = COPY %1
@@ -394,16 +394,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s24_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), align 8, addrspace 4)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), align 8, addrspace 4)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_constant_s24_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), align 8, addrspace 4)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), align 8, addrspace 4)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_constant_s24_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), align 8, addrspace 4)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), align 8, addrspace 4)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 8, addrspace 4)
     %2:_(s32) = G_ANYEXT %1
@@ -418,16 +418,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s24_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_constant_s24_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_constant_s24_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 4)
     %2:_(s32) = G_ANYEXT %1
@@ -442,93 +442,93 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s24_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 2, align 2, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 2, align 2, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_constant_s24_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 2, align 2, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 2, align 2, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_constant_s24_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 2, align 2, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 2, align 2, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 2, addrspace 4)
     %2:_(s32) = G_ANYEXT %1
@@ -543,96 +543,96 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s24_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_constant_s24_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_constant_s24_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 1, addrspace 4)
     %2:_(s32) = G_ANYEXT %1
@@ -647,22 +647,22 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s48_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
-    ; CI: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
-    ; CI: $vgpr0_vgpr1 = COPY [[AND]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; VI-LABEL: name: test_load_constant_s48_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
-    ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
-    ; VI: $vgpr0_vgpr1 = COPY [[AND]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; GFX9-LABEL: name: test_load_constant_s48_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
-    ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
-    ; GFX9: $vgpr0_vgpr1 = COPY [[AND]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s48) = G_LOAD %0 :: (load (s48), align 8, addrspace 4)
     %2:_(s64) = G_ZEXT %1
@@ -677,16 +677,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s64_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; VI-LABEL: name: test_load_constant_s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX9-LABEL: name: test_load_constant_s64_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -700,16 +700,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s64_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), align 4, addrspace 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), align 4, addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; VI-LABEL: name: test_load_constant_s64_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), align 4, addrspace 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), align 4, addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX9-LABEL: name: test_load_constant_s64_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), align 4, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), align 4, addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -723,76 +723,76 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s64_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; VI-LABEL: name: test_load_constant_s64_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; GFX9-LABEL: name: test_load_constant_s64_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -806,191 +806,191 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s64_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; VI-LABEL: name: test_load_constant_s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; GFX9-LABEL: name: test_load_constant_s64_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -1004,19 +1004,19 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s96_align16
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_constant_s96_align16
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_constant_s96_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 16, addrspace 4)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -1030,19 +1030,19 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s96_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 8, addrspace 4)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 8, addrspace 4)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_constant_s96_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 8, addrspace 4)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 8, addrspace 4)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_constant_s96_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 8, addrspace 4)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 8, addrspace 4)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 8, addrspace 4)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -1056,19 +1056,19 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s96_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_constant_s96_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_constant_s96_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 4)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -1082,103 +1082,103 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s96_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_constant_s96_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_constant_s96_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 4)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -1192,205 +1192,205 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s96_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_constant_s96_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_constant_s96_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 4)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -1404,34 +1404,34 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s160_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 16, addrspace 4)
-    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
-    ; CI: S_NOP 0, implicit [[BITCAST]](s160)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 16, addrspace 4)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
+    ; CI-NEXT: S_NOP 0, implicit [[BITCAST]](s160)
     ; VI-LABEL: name: test_load_constant_s160_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 16, addrspace 4)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
-    ; VI: S_NOP 0, implicit [[BITCAST]](s160)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 16, addrspace 4)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
+    ; VI-NEXT: S_NOP 0, implicit [[BITCAST]](s160)
     ; GFX9-LABEL: name: test_load_constant_s160_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 16, addrspace 4)
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
-    ; GFX9: S_NOP 0, implicit [[BITCAST]](s160)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 16, addrspace 4)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
+    ; GFX9-NEXT: S_NOP 0, implicit [[BITCAST]](s160)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s160) = G_LOAD %0 :: (load (s160), align 4, addrspace 4)
     S_NOP 0, implicit %1
@@ -1445,61 +1445,61 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s224_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4)
-    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; CI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
-    ; CI: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
-    ; CI: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
-    ; CI: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
-    ; CI: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; CI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; CI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; CI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     ; VI-LABEL: name: test_load_constant_s224_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
-    ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
-    ; VI: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
-    ; VI: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
-    ; VI: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; VI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; VI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     ; GFX9-LABEL: name: test_load_constant_s224_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4)
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
-    ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
-    ; GFX9: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
      %0:_(p4) = COPY $vgpr0_vgpr1
      %1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 4)
      %2:_(s256) = G_IMPLICIT_DEF
@@ -1516,19 +1516,19 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s128_align16
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_constant_s128_align16
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-LABEL: name: test_load_constant_s128_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 16, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -1542,19 +1542,19 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s128_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_constant_s128_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-LABEL: name: test_load_constant_s128_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -1568,262 +1568,262 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s128_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_constant_s128_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-LABEL: name: test_load_constant_s128_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -1837,19 +1837,19 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s256_align32
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), align 16, addrspace 4)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), align 16, addrspace 4)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
     ; VI-LABEL: name: test_load_constant_s256_align32
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), align 16, addrspace 4)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), align 16, addrspace 4)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
     ; GFX9-LABEL: name: test_load_constant_s256_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), align 16, addrspace 4)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), align 16, addrspace 4)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s256) = G_LOAD %0 :: (load (s256), align 16, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -1863,16 +1863,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_p1_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), addrspace 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; VI-LABEL: name: test_load_constant_p1_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), addrspace 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX9-LABEL: name: test_load_constant_p1_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), addrspace 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -1886,16 +1886,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_p1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), align 4, addrspace 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), align 4, addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; VI-LABEL: name: test_load_constant_p1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), align 4, addrspace 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), align 4, addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX9-LABEL: name: test_load_constant_p1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), align 4, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), align 4, addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -1909,191 +1909,191 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_p1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; VI-LABEL: name: test_load_constant_p1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; GFX9-LABEL: name: test_load_constant_p1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -2107,16 +2107,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_p3_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load (p3), addrspace 4)
-    ; CI: $vgpr0 = COPY [[LOAD]](p3)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load (p3), addrspace 4)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; VI-LABEL: name: test_load_constant_p3_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load (p3), addrspace 4)
-    ; VI: $vgpr0 = COPY [[LOAD]](p3)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load (p3), addrspace 4)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; GFX9-LABEL: name: test_load_constant_p3_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load (p3), addrspace 4)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load (p3), addrspace 4)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3)
      %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 4)
     $vgpr0 = COPY %1
@@ -2130,16 +2130,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_p4_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), addrspace 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; VI-LABEL: name: test_load_constant_p4_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), addrspace 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; GFX9-LABEL: name: test_load_constant_p4_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), addrspace 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(p4) = G_LOAD %0 :: (load (p4), align 8, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -2153,16 +2153,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_p4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), align 4, addrspace 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), align 4, addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; VI-LABEL: name: test_load_constant_p4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), align 4, addrspace 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), align 4, addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; GFX9-LABEL: name: test_load_constant_p4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), align 4, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), align 4, addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(p4) = G_LOAD %0 :: (load (p4), align 4, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -2176,76 +2176,76 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_p4_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     ; VI-LABEL: name: test_load_constant_p4_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     ; GFX9-LABEL: name: test_load_constant_p4_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(p4) = G_LOAD %0 :: (load (p4), align 2, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -2259,191 +2259,191 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_p4_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     ; VI-LABEL: name: test_load_constant_p4_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     ; GFX9-LABEL: name: test_load_constant_p4_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(p4) = G_LOAD %0 :: (load (p4), align 1, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -2457,16 +2457,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_p5_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load (p5), addrspace 4)
-    ; CI: $vgpr0 = COPY [[LOAD]](p5)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load (p5), addrspace 4)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; VI-LABEL: name: test_load_constant_p5_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load (p5), addrspace 4)
-    ; VI: $vgpr0 = COPY [[LOAD]](p5)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load (p5), addrspace 4)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; GFX9-LABEL: name: test_load_constant_p5_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load (p5), addrspace 4)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](p5)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load (p5), addrspace 4)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 4)
     $vgpr0 = COPY %1
@@ -2480,46 +2480,46 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_p5_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; CI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; VI-LABEL: name: test_load_constant_p5_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; VI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; GFX9-LABEL: name: test_load_constant_p5_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 4)
     $vgpr0 = COPY %1
@@ -2533,88 +2533,88 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_p5_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; CI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; VI-LABEL: name: test_load_constant_p5_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; VI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; GFX9-LABEL: name: test_load_constant_p5_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 4)
     $vgpr0 = COPY %1
@@ -2628,50 +2628,50 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2s8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_constant_v2s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_constant_v2s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 4, addrspace 4)
     %2:_(s16) = G_BITCAST %1
@@ -2687,50 +2687,50 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2s8_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_constant_v2s8_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_constant_v2s8_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 4)
     %2:_(s16) = G_BITCAST %1
@@ -2746,53 +2746,53 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_constant_v2s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_constant_v2s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 4)
     %2:_(s16) = G_BITCAST %1
@@ -2808,87 +2808,87 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v3s8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_constant_v3s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_constant_v3s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 4, addrspace 4)
     %2:_(s24) = G_BITCAST %1
@@ -2904,96 +2904,96 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v3s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_constant_v3s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_constant_v3s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 4)
     %2:_(s24) = G_BITCAST %1
@@ -3009,67 +3009,67 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v4s8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_constant_v4s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_constant_v4s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 4)
     %2:_(s32) = G_BITCAST %1
@@ -3084,73 +3084,73 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v4s8_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_constant_v4s8_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_constant_v4s8_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 2, addrspace 4)
     %2:_(s32) = G_BITCAST %1
@@ -3165,85 +3165,85 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v4s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_constant_v4s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_constant_v4s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 1, addrspace 4)
     %2:_(s32) = G_BITCAST %1
@@ -3258,112 +3258,112 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v8s8_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
-    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_constant_v8s8_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_constant_v8s8_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 4)
     %2:_(<2 x s32>) = G_BITCAST %1
@@ -3378,99 +3378,99 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v16s8_align16
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
-    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; CI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; CI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; CI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; CI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
-    ; CI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; CI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; CI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; CI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
+    ; CI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32)
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
+    ; CI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; CI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>)
     ; VI-LABEL: name: test_load_constant_v16s8_align16
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32)
-    ; VI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32)
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32)
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>)
     ; GFX9-LABEL: name: test_load_constant_v16s8_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[LSHR3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[LSHR5]](s32)
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
-    ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; GFX9: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV2]](s32), [[LSHR6]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR7]](s32), [[LSHR8]](s32)
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>)
-    ; GFX9: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; GFX9: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; GFX9: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV3]](s32), [[LSHR9]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR10]](s32), [[LSHR11]](s32)
-    ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>)
-    ; GFX9: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[LSHR3]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[LSHR5]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
+    ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV2]](s32), [[LSHR6]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR7]](s32), [[LSHR8]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>)
+    ; GFX9-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; GFX9-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV3]](s32), [[LSHR9]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR10]](s32), [[LSHR11]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>)
+    ; GFX9-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 16, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -3484,167 +3484,167 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v32s8_align32
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4)
-    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; CI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; CI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; CI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; CI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
-    ; CI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; CI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; CI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; CI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; CI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; CI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; CI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; CI: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32)
-    ; CI: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR4]](<4 x s32>)
-    ; CI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; CI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; CI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; CI: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR5]](<4 x s32>)
-    ; CI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; CI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; CI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; CI: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32)
-    ; CI: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR6]](<4 x s32>)
-    ; CI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; CI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; CI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; CI: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x s32>)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
+    ; CI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32)
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
+    ; CI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; CI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
+    ; CI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; CI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32)
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR4]](<4 x s32>)
+    ; CI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; CI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR5]](<4 x s32>)
+    ; CI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; CI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32)
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR6]](<4 x s32>)
+    ; CI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; CI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x s32>)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>)
     ; VI-LABEL: name: test_load_constant_v32s8_align32
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32)
-    ; VI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; VI: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32)
-    ; VI: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR4]](<4 x s32>)
-    ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; VI: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32)
-    ; VI: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR5]](<4 x s32>)
-    ; VI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; VI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; VI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; VI: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32)
-    ; VI: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR6]](<4 x s32>)
-    ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; VI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; VI: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32)
-    ; VI: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x s32>)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32)
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32)
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
+    ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32)
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR4]](<4 x s32>)
+    ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32)
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR5]](<4 x s32>)
+    ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32)
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR6]](<4 x s32>)
+    ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32)
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x s32>)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>)
     ; GFX9-LABEL: name: test_load_constant_v32s8_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4)
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[LSHR3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[LSHR5]](s32)
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
-    ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; GFX9: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV2]](s32), [[LSHR6]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR7]](s32), [[LSHR8]](s32)
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>)
-    ; GFX9: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; GFX9: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; GFX9: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV3]](s32), [[LSHR9]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR10]](s32), [[LSHR11]](s32)
-    ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>)
-    ; GFX9: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; GFX9: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; GFX9: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV4]](s32), [[LSHR12]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR13]](s32), [[LSHR14]](s32)
-    ; GFX9: [[CONCAT_VECTORS4:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>)
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS4]](<4 x s16>)
-    ; GFX9: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; GFX9: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; GFX9: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV5]](s32), [[LSHR15]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR16]](s32), [[LSHR17]](s32)
-    ; GFX9: [[CONCAT_VECTORS5:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[BUILD_VECTOR_TRUNC11]](<2 x s16>)
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS5]](<4 x s16>)
-    ; GFX9: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; GFX9: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; GFX9: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV6]](s32), [[LSHR18]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR19]](s32), [[LSHR20]](s32)
-    ; GFX9: [[CONCAT_VECTORS6:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC12]](<2 x s16>), [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS6]](<4 x s16>)
-    ; GFX9: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; GFX9: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; GFX9: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV7]](s32), [[LSHR21]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC15:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR22]](s32), [[LSHR23]](s32)
-    ; GFX9: [[CONCAT_VECTORS7:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC14]](<2 x s16>), [[BUILD_VECTOR_TRUNC15]](<2 x s16>)
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS7]](<4 x s16>)
-    ; GFX9: [[CONCAT_VECTORS8:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS8]](<32 x s8>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[LSHR3]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[LSHR5]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
+    ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV2]](s32), [[LSHR6]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR7]](s32), [[LSHR8]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>)
+    ; GFX9-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; GFX9-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV3]](s32), [[LSHR9]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR10]](s32), [[LSHR11]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>)
+    ; GFX9-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; GFX9-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV4]](s32), [[LSHR12]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR13]](s32), [[LSHR14]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>)
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS4]](<4 x s16>)
+    ; GFX9-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; GFX9-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV5]](s32), [[LSHR15]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR16]](s32), [[LSHR17]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS5:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[BUILD_VECTOR_TRUNC11]](<2 x s16>)
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS5]](<4 x s16>)
+    ; GFX9-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; GFX9-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV6]](s32), [[LSHR18]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR19]](s32), [[LSHR20]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS6:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC12]](<2 x s16>), [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS6]](<4 x s16>)
+    ; GFX9-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; GFX9-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV7]](s32), [[LSHR21]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC15:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR22]](s32), [[LSHR23]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS7:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC14]](<2 x s16>), [[BUILD_VECTOR_TRUNC15]](<2 x s16>)
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS7]](<4 x s16>)
+    ; GFX9-NEXT: [[CONCAT_VECTORS8:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS8]](<32 x s8>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<32 x s8>) = G_LOAD %0 :: (load (<32 x s8>), align 32, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -3659,16 +3659,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load (<2 x s16>), addrspace 4)
-    ; CI: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load (<2 x s16>), addrspace 4)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; VI-LABEL: name: test_load_constant_v2s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load (<2 x s16>), addrspace 4)
-    ; VI: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load (<2 x s16>), addrspace 4)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v2s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load (<2 x s16>), addrspace 4)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load (<2 x s16>), addrspace 4)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 4)
     $vgpr0 = COPY %1
@@ -3682,40 +3682,40 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; VI-LABEL: name: test_load_constant_v2s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v2s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 4)
     $vgpr0 = COPY %1
@@ -3729,99 +3729,99 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; VI-LABEL: name: test_load_constant_v2s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v2s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 4)
     $vgpr0 = COPY %1
@@ -3835,84 +3835,84 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v3s16_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_constant_v3s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v3s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 4)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -3928,90 +3928,90 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v3s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_constant_v3s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v3s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 4)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -4027,90 +4027,90 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v3s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_constant_v3s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v3s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 4)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -4126,177 +4126,177 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v3s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C7]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
-    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C7]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_constant_v3s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C6]]
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C6]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C6]]
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C6]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v3s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 4)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -4312,16 +4312,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v4s16_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; VI-LABEL: name: test_load_constant_v4s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v4s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -4335,16 +4335,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v4s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), align 4, addrspace 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), align 4, addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; VI-LABEL: name: test_load_constant_v4s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), align 4, addrspace 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), align 4, addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v4s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), align 4, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), align 4, addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -4358,72 +4358,72 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v4s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; VI-LABEL: name: test_load_constant_v4s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v4s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -4437,180 +4437,180 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v4s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; VI-LABEL: name: test_load_constant_v4s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C4]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C4]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v4s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -4624,19 +4624,19 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v8s16_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4)
-    ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
     ; VI-LABEL: name: test_load_constant_v8s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v8s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 8, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -4650,16 +4650,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2s32_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_load_constant_v2s32_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_constant_v2s32_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -4673,16 +4673,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_load_constant_v2s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_constant_v2s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -4696,73 +4696,73 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2s32_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_constant_v2s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_constant_v2s32_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -4776,145 +4776,145 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_constant_v2s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_constant_v2s32_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -4928,16 +4928,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v3s32_align16
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; VI-LABEL: name: test_load_constant_v3s32_align16
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; GFX9-LABEL: name: test_load_constant_v3s32_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 16, addrspace 4)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -4953,16 +4953,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v3s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; VI-LABEL: name: test_load_constant_v3s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; GFX9-LABEL: name: test_load_constant_v3s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 4)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -4976,16 +4976,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v4s32_align16
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; VI-LABEL: name: test_load_constant_v4s32_align16
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_constant_v4s32_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -4999,16 +4999,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v4s32_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; VI-LABEL: name: test_load_constant_v4s32_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_constant_v4s32_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -5022,16 +5022,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v4s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; VI-LABEL: name: test_load_constant_v4s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_constant_v4s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -5045,16 +5045,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v8s32_align32
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>)
     ; VI-LABEL: name: test_load_constant_v8s32_align32
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>)
     ; GFX9-LABEL: name: test_load_constant_v8s32_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 32, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -5068,16 +5068,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v16s32_align32
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
     ; VI-LABEL: name: test_load_constant_v16s32_align32
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
     ; GFX9-LABEL: name: test_load_constant_v16s32_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 32, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
@@ -5091,16 +5091,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v16s32_align32_extload_from_v16s16
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
     ; VI-LABEL: name: test_load_constant_v16s32_align32_extload_from_v16s16
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
     ; GFX9-LABEL: name: test_load_constant_v16s32_align32_extload_from_v16s16
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s16>), align 32, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
@@ -5114,16 +5114,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2s64_align16
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), addrspace 4)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; VI-LABEL: name: test_load_constant_v2s64_align16
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), addrspace 4)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; GFX9-LABEL: name: test_load_constant_v2s64_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), addrspace 4)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 16, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -5137,16 +5137,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2s64_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; VI-LABEL: name: test_load_constant_v2s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; GFX9-LABEL: name: test_load_constant_v2s64_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 8, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -5160,16 +5160,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2s64_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 4, addrspace 4)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 4, addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; VI-LABEL: name: test_load_constant_v2s64_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 4, addrspace 4)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 4, addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; GFX9-LABEL: name: test_load_constant_v2s64_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 4, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 4, addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -5183,133 +5183,133 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2s64_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 4)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 4)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 4)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; VI-LABEL: name: test_load_constant_v2s64_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 4)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 4)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 4)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-LABEL: name: test_load_constant_v2s64_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 4)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 4)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 4)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 2, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -5323,348 +5323,348 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2s64_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
-    ; CI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; CI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
-    ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
-    ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; CI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; CI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
-    ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
-    ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
-    ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
+    ; CI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; CI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
+    ; CI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; CI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; CI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
+    ; CI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; CI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; CI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
+    ; CI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; CI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; CI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
+    ; CI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
+    ; CI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; CI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; CI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; CI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; VI-LABEL: name: test_load_constant_v2s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
-    ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
+    ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; VI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; VI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; VI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; VI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-LABEL: name: test_load_constant_v2s64_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
-    ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
+    ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; GFX9-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; GFX9-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -5678,25 +5678,25 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v3s64_align32
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4)
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4)
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_constant_v3s64_align32
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_constant_v3s64_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4)
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 4)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -5712,37 +5712,37 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v3s64_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4)
-    ; CI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_constant_v3s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4)
-    ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_constant_v3s64_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4)
-    ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 4)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -5758,508 +5758,508 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v3s64_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
-    ; CI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; CI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
-    ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
-    ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; CI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; CI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
-    ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
-    ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
-    ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; CI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C12]](s64)
-    ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4)
-    ; CI: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4)
-    ; CI: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4)
-    ; CI: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4)
-    ; CI: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4)
-    ; CI: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4)
-    ; CI: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4)
-    ; CI: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4)
-    ; CI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; CI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C9]]
-    ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY8]](s32)
-    ; CI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
-    ; CI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]]
-    ; CI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; CI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C9]]
-    ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY9]](s32)
-    ; CI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
-    ; CI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]]
-    ; CI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; CI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C9]]
-    ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY10]](s32)
-    ; CI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
-    ; CI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]]
-    ; CI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; CI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C9]]
-    ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY11]](s32)
-    ; CI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
-    ; CI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]]
-    ; CI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; CI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; CI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32)
-    ; CI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; CI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; CI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
-    ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
+    ; CI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; CI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
+    ; CI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; CI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; CI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
+    ; CI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; CI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; CI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
+    ; CI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; CI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; CI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
+    ; CI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
+    ; CI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; CI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; CI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; CI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; CI-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C12]](s64)
+    ; CI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; CI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4)
+    ; CI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; CI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; CI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C9]]
+    ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY8]](s32)
+    ; CI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
+    ; CI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]]
+    ; CI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; CI-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; CI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C9]]
+    ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY9]](s32)
+    ; CI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
+    ; CI-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]]
+    ; CI-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; CI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; CI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C9]]
+    ; CI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY10]](s32)
+    ; CI-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
+    ; CI-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]]
+    ; CI-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; CI-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; CI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C9]]
+    ; CI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY11]](s32)
+    ; CI-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
+    ; CI-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]]
+    ; CI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; CI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; CI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32)
+    ; CI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; CI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; CI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; CI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
+    ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_constant_v3s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
-    ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4)
-    ; VI: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4)
-    ; VI: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4)
-    ; VI: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4)
-    ; VI: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4)
-    ; VI: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4)
-    ; VI: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4)
-    ; VI: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4)
-    ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
-    ; VI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
-    ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
-    ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
-    ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; VI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
-    ; VI: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
-    ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
-    ; VI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
-    ; VI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; VI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
-    ; VI: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
-    ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
-    ; VI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
-    ; VI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; VI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; VI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
-    ; VI: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
-    ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
-    ; VI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
-    ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
-    ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
-    ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
+    ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; VI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; VI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; VI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; VI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; VI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; VI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4)
+    ; VI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; VI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; VI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
+    ; VI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
+    ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
+    ; VI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
+    ; VI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; VI-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; VI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
+    ; VI-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
+    ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
+    ; VI-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
+    ; VI-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; VI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; VI-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
+    ; VI-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
+    ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
+    ; VI-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
+    ; VI-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; VI-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; VI-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
+    ; VI-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
+    ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
+    ; VI-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
+    ; VI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; VI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
+    ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; VI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; VI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
+    ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_constant_v3s64_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
-    ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; GFX9: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4)
-    ; GFX9: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4)
-    ; GFX9: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4)
-    ; GFX9: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4)
-    ; GFX9: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4)
-    ; GFX9: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4)
-    ; GFX9: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4)
-    ; GFX9: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4)
-    ; GFX9: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; GFX9: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; GFX9: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
-    ; GFX9: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
-    ; GFX9: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
-    ; GFX9: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
-    ; GFX9: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; GFX9: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; GFX9: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
-    ; GFX9: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
-    ; GFX9: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
-    ; GFX9: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
-    ; GFX9: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; GFX9: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; GFX9: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
-    ; GFX9: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
-    ; GFX9: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
-    ; GFX9: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
-    ; GFX9: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; GFX9: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; GFX9: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
-    ; GFX9: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
-    ; GFX9: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
-    ; GFX9: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
-    ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; GFX9: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
-    ; GFX9: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
-    ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
+    ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; GFX9-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; GFX9-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; GFX9-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4)
+    ; GFX9-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; GFX9-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
+    ; GFX9-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
+    ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
+    ; GFX9-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; GFX9-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
+    ; GFX9-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
+    ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
+    ; GFX9-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; GFX9-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
+    ; GFX9-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
+    ; GFX9-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
+    ; GFX9-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; GFX9-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
+    ; GFX9-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
+    ; GFX9-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
+    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; GFX9-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; GFX9-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 4)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -6275,16 +6275,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v4s64_align32
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     ; VI-LABEL: name: test_load_constant_v4s64_align32
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_constant_v4s64_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 32, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -6298,16 +6298,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v4s64_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), align 8, addrspace 4)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), align 8, addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     ; VI-LABEL: name: test_load_constant_v4s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), align 8, addrspace 4)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), align 8, addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_constant_v4s64_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), align 8, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), align 8, addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 8, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -6321,656 +6321,656 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v4s64_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
-    ; CI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; CI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
-    ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
-    ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; CI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; CI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
-    ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
-    ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
-    ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; CI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C12]](s64)
-    ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4)
-    ; CI: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4)
-    ; CI: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4)
-    ; CI: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4)
-    ; CI: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4)
-    ; CI: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4)
-    ; CI: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4)
-    ; CI: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4)
-    ; CI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; CI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C9]]
-    ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY8]](s32)
-    ; CI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
-    ; CI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]]
-    ; CI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; CI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C9]]
-    ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY9]](s32)
-    ; CI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
-    ; CI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]]
-    ; CI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; CI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C9]]
-    ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY10]](s32)
-    ; CI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
-    ; CI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]]
-    ; CI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; CI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C9]]
-    ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY11]](s32)
-    ; CI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
-    ; CI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]]
-    ; CI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; CI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; CI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32)
-    ; CI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; CI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; CI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
-    ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; CI: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
-    ; CI: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C13]](s64)
-    ; CI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p4) :: (load (s8) from unknown-address + 24, addrspace 4)
-    ; CI: [[PTR_ADD24:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
-    ; CI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p4) :: (load (s8) from unknown-address + 25, addrspace 4)
-    ; CI: [[PTR_ADD25:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
-    ; CI: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p4) :: (load (s8) from unknown-address + 26, addrspace 4)
-    ; CI: [[PTR_ADD26:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
-    ; CI: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p4) :: (load (s8) from unknown-address + 27, addrspace 4)
-    ; CI: [[PTR_ADD27:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
-    ; CI: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p4) :: (load (s8) from unknown-address + 28, addrspace 4)
-    ; CI: [[PTR_ADD28:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
-    ; CI: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p4) :: (load (s8) from unknown-address + 29, addrspace 4)
-    ; CI: [[PTR_ADD29:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
-    ; CI: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p4) :: (load (s8) from unknown-address + 30, addrspace 4)
-    ; CI: [[PTR_ADD30:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
-    ; CI: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p4) :: (load (s8) from unknown-address + 31, addrspace 4)
-    ; CI: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
-    ; CI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
-    ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LOAD25]], [[C9]]
-    ; CI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY12]](s32)
-    ; CI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL18]](s32)
-    ; CI: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]]
-    ; CI: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
-    ; CI: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
-    ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LOAD27]], [[C9]]
-    ; CI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY13]](s32)
-    ; CI: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL19]](s32)
-    ; CI: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]]
-    ; CI: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
-    ; CI: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
-    ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LOAD29]], [[C9]]
-    ; CI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY14]](s32)
-    ; CI: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL20]](s32)
-    ; CI: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]]
-    ; CI: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
-    ; CI: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
-    ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LOAD31]], [[C9]]
-    ; CI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY15]](s32)
-    ; CI: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL21]](s32)
-    ; CI: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]]
-    ; CI: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
-    ; CI: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
-    ; CI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C10]](s32)
-    ; CI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
-    ; CI: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
-    ; CI: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
-    ; CI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C10]](s32)
-    ; CI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
-    ; CI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
+    ; CI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; CI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
+    ; CI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; CI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; CI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
+    ; CI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; CI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; CI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
+    ; CI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; CI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; CI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
+    ; CI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
+    ; CI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; CI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; CI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; CI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; CI-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C12]](s64)
+    ; CI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; CI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4)
+    ; CI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; CI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; CI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C9]]
+    ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY8]](s32)
+    ; CI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
+    ; CI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]]
+    ; CI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; CI-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; CI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C9]]
+    ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY9]](s32)
+    ; CI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
+    ; CI-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]]
+    ; CI-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; CI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; CI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C9]]
+    ; CI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY10]](s32)
+    ; CI-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
+    ; CI-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]]
+    ; CI-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; CI-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; CI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C9]]
+    ; CI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY11]](s32)
+    ; CI-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
+    ; CI-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]]
+    ; CI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; CI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; CI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32)
+    ; CI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; CI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; CI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; CI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
+    ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; CI-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+    ; CI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C13]](s64)
+    ; CI-NEXT: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p4) :: (load (s8) from unknown-address + 24, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
+    ; CI-NEXT: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p4) :: (load (s8) from unknown-address + 25, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p4) :: (load (s8) from unknown-address + 26, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p4) :: (load (s8) from unknown-address + 27, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p4) :: (load (s8) from unknown-address + 28, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p4) :: (load (s8) from unknown-address + 29, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p4) :: (load (s8) from unknown-address + 30, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p4) :: (load (s8) from unknown-address + 31, addrspace 4)
+    ; CI-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
+    ; CI-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
+    ; CI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LOAD25]], [[C9]]
+    ; CI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY12]](s32)
+    ; CI-NEXT: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL18]](s32)
+    ; CI-NEXT: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]]
+    ; CI-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
+    ; CI-NEXT: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
+    ; CI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LOAD27]], [[C9]]
+    ; CI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY13]](s32)
+    ; CI-NEXT: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL19]](s32)
+    ; CI-NEXT: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]]
+    ; CI-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
+    ; CI-NEXT: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
+    ; CI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LOAD29]], [[C9]]
+    ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY14]](s32)
+    ; CI-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL20]](s32)
+    ; CI-NEXT: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]]
+    ; CI-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
+    ; CI-NEXT: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
+    ; CI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LOAD31]], [[C9]]
+    ; CI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY15]](s32)
+    ; CI-NEXT: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL21]](s32)
+    ; CI-NEXT: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]]
+    ; CI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
+    ; CI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
+    ; CI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C10]](s32)
+    ; CI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
+    ; CI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
+    ; CI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
+    ; CI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C10]](s32)
+    ; CI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
+    ; CI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
     ; VI-LABEL: name: test_load_constant_v4s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
-    ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4)
-    ; VI: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4)
-    ; VI: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4)
-    ; VI: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4)
-    ; VI: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4)
-    ; VI: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4)
-    ; VI: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4)
-    ; VI: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4)
-    ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
-    ; VI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
-    ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
-    ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
-    ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; VI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
-    ; VI: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
-    ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
-    ; VI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
-    ; VI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; VI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
-    ; VI: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
-    ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
-    ; VI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
-    ; VI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; VI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; VI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
-    ; VI: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
-    ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
-    ; VI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
-    ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
-    ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
-    ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; VI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
-    ; VI: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C12]](s64)
-    ; VI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p4) :: (load (s8) from unknown-address + 24, addrspace 4)
-    ; VI: [[PTR_ADD24:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
-    ; VI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p4) :: (load (s8) from unknown-address + 25, addrspace 4)
-    ; VI: [[PTR_ADD25:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
-    ; VI: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p4) :: (load (s8) from unknown-address + 26, addrspace 4)
-    ; VI: [[PTR_ADD26:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
-    ; VI: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p4) :: (load (s8) from unknown-address + 27, addrspace 4)
-    ; VI: [[PTR_ADD27:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
-    ; VI: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p4) :: (load (s8) from unknown-address + 28, addrspace 4)
-    ; VI: [[PTR_ADD28:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
-    ; VI: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p4) :: (load (s8) from unknown-address + 29, addrspace 4)
-    ; VI: [[PTR_ADD29:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
-    ; VI: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p4) :: (load (s8) from unknown-address + 30, addrspace 4)
-    ; VI: [[PTR_ADD30:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
-    ; VI: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p4) :: (load (s8) from unknown-address + 31, addrspace 4)
-    ; VI: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
-    ; VI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
-    ; VI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32)
-    ; VI: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]]
-    ; VI: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16)
-    ; VI: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]]
-    ; VI: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
-    ; VI: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
-    ; VI: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32)
-    ; VI: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]]
-    ; VI: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16)
-    ; VI: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]]
-    ; VI: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
-    ; VI: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
-    ; VI: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32)
-    ; VI: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]]
-    ; VI: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16)
-    ; VI: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]]
-    ; VI: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
-    ; VI: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
-    ; VI: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32)
-    ; VI: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]]
-    ; VI: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16)
-    ; VI: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]]
-    ; VI: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
-    ; VI: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
-    ; VI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32)
-    ; VI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
-    ; VI: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
-    ; VI: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
-    ; VI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32)
-    ; VI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
-    ; VI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
+    ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; VI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; VI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; VI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; VI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; VI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; VI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4)
+    ; VI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; VI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; VI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
+    ; VI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
+    ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
+    ; VI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
+    ; VI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; VI-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; VI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
+    ; VI-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
+    ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
+    ; VI-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
+    ; VI-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; VI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; VI-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
+    ; VI-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
+    ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
+    ; VI-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
+    ; VI-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; VI-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; VI-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
+    ; VI-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
+    ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
+    ; VI-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
+    ; VI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; VI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
+    ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; VI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; VI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
+    ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; VI-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+    ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C12]](s64)
+    ; VI-NEXT: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p4) :: (load (s8) from unknown-address + 24, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
+    ; VI-NEXT: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p4) :: (load (s8) from unknown-address + 25, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p4) :: (load (s8) from unknown-address + 26, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p4) :: (load (s8) from unknown-address + 27, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p4) :: (load (s8) from unknown-address + 28, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p4) :: (load (s8) from unknown-address + 29, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p4) :: (load (s8) from unknown-address + 30, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p4) :: (load (s8) from unknown-address + 31, addrspace 4)
+    ; VI-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
+    ; VI-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
+    ; VI-NEXT: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32)
+    ; VI-NEXT: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]]
+    ; VI-NEXT: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16)
+    ; VI-NEXT: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]]
+    ; VI-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
+    ; VI-NEXT: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
+    ; VI-NEXT: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32)
+    ; VI-NEXT: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]]
+    ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16)
+    ; VI-NEXT: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]]
+    ; VI-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
+    ; VI-NEXT: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
+    ; VI-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32)
+    ; VI-NEXT: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]]
+    ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16)
+    ; VI-NEXT: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]]
+    ; VI-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
+    ; VI-NEXT: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
+    ; VI-NEXT: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32)
+    ; VI-NEXT: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]]
+    ; VI-NEXT: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16)
+    ; VI-NEXT: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]]
+    ; VI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
+    ; VI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
+    ; VI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32)
+    ; VI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
+    ; VI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
+    ; VI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
+    ; VI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32)
+    ; VI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
+    ; VI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_constant_v4s64_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
-    ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; GFX9: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4)
-    ; GFX9: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4)
-    ; GFX9: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4)
-    ; GFX9: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4)
-    ; GFX9: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4)
-    ; GFX9: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4)
-    ; GFX9: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4)
-    ; GFX9: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4)
-    ; GFX9: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; GFX9: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; GFX9: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
-    ; GFX9: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
-    ; GFX9: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
-    ; GFX9: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
-    ; GFX9: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; GFX9: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; GFX9: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
-    ; GFX9: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
-    ; GFX9: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
-    ; GFX9: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
-    ; GFX9: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; GFX9: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; GFX9: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
-    ; GFX9: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
-    ; GFX9: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
-    ; GFX9: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
-    ; GFX9: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; GFX9: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; GFX9: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
-    ; GFX9: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
-    ; GFX9: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
-    ; GFX9: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
-    ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; GFX9: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
-    ; GFX9: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
-    ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; GFX9: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
-    ; GFX9: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C12]](s64)
-    ; GFX9: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p4) :: (load (s8) from unknown-address + 24, addrspace 4)
-    ; GFX9: [[PTR_ADD24:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
-    ; GFX9: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p4) :: (load (s8) from unknown-address + 25, addrspace 4)
-    ; GFX9: [[PTR_ADD25:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
-    ; GFX9: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p4) :: (load (s8) from unknown-address + 26, addrspace 4)
-    ; GFX9: [[PTR_ADD26:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
-    ; GFX9: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p4) :: (load (s8) from unknown-address + 27, addrspace 4)
-    ; GFX9: [[PTR_ADD27:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
-    ; GFX9: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p4) :: (load (s8) from unknown-address + 28, addrspace 4)
-    ; GFX9: [[PTR_ADD28:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
-    ; GFX9: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p4) :: (load (s8) from unknown-address + 29, addrspace 4)
-    ; GFX9: [[PTR_ADD29:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
-    ; GFX9: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p4) :: (load (s8) from unknown-address + 30, addrspace 4)
-    ; GFX9: [[PTR_ADD30:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
-    ; GFX9: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p4) :: (load (s8) from unknown-address + 31, addrspace 4)
-    ; GFX9: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
-    ; GFX9: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
-    ; GFX9: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32)
-    ; GFX9: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]]
-    ; GFX9: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16)
-    ; GFX9: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]]
-    ; GFX9: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
-    ; GFX9: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
-    ; GFX9: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32)
-    ; GFX9: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]]
-    ; GFX9: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16)
-    ; GFX9: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]]
-    ; GFX9: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
-    ; GFX9: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
-    ; GFX9: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32)
-    ; GFX9: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]]
-    ; GFX9: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16)
-    ; GFX9: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]]
-    ; GFX9: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
-    ; GFX9: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
-    ; GFX9: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32)
-    ; GFX9: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]]
-    ; GFX9: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16)
-    ; GFX9: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]]
-    ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
-    ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
-    ; GFX9: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32)
-    ; GFX9: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
-    ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
-    ; GFX9: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
-    ; GFX9: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32)
-    ; GFX9: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
-    ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
+    ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; GFX9-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; GFX9-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; GFX9-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4)
+    ; GFX9-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; GFX9-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
+    ; GFX9-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
+    ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
+    ; GFX9-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; GFX9-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
+    ; GFX9-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
+    ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
+    ; GFX9-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; GFX9-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
+    ; GFX9-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
+    ; GFX9-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
+    ; GFX9-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; GFX9-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
+    ; GFX9-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
+    ; GFX9-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
+    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; GFX9-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; GFX9-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; GFX9-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+    ; GFX9-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C12]](s64)
+    ; GFX9-NEXT: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p4) :: (load (s8) from unknown-address + 24, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p4) :: (load (s8) from unknown-address + 25, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p4) :: (load (s8) from unknown-address + 26, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p4) :: (load (s8) from unknown-address + 27, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p4) :: (load (s8) from unknown-address + 28, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p4) :: (load (s8) from unknown-address + 29, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p4) :: (load (s8) from unknown-address + 30, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p4) :: (load (s8) from unknown-address + 31, addrspace 4)
+    ; GFX9-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
+    ; GFX9-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32)
+    ; GFX9-NEXT: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]]
+    ; GFX9-NEXT: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]]
+    ; GFX9-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
+    ; GFX9-NEXT: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32)
+    ; GFX9-NEXT: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]]
+    ; GFX9-NEXT: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]]
+    ; GFX9-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
+    ; GFX9-NEXT: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32)
+    ; GFX9-NEXT: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]]
+    ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]]
+    ; GFX9-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
+    ; GFX9-NEXT: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32)
+    ; GFX9-NEXT: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]]
+    ; GFX9-NEXT: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]]
+    ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
+    ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
+    ; GFX9-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
+    ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
+    ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
+    ; GFX9-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
+    ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 1, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -6984,19 +6984,19 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2s128_align32
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4)
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
     ; VI-LABEL: name: test_load_constant_v2s128_align32
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
     ; GFX9-LABEL: name: test_load_constant_v2s128_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s128>) = G_LOAD %0 :: (load (<2 x s128>), align 32, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -7010,19 +7010,19 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2p1_align16
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; VI-LABEL: name: test_load_constant_v2p1_align16
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-LABEL: name: test_load_constant_v2p1_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 16, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -7036,19 +7036,19 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2p1_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4)
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; VI-LABEL: name: test_load_constant_v2p1_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-LABEL: name: test_load_constant_v2p1_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 8, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -7062,19 +7062,19 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2p1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; VI-LABEL: name: test_load_constant_v2p1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-LABEL: name: test_load_constant_v2p1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -7088,262 +7088,262 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2p1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; VI-LABEL: name: test_load_constant_v2p1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-LABEL: name: test_load_constant_v2p1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 1, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -7357,16 +7357,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2p3_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; VI-LABEL: name: test_load_constant_v2p3_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; GFX9-LABEL: name: test_load_constant_v2p3_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -7380,16 +7380,16 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2p3_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; VI-LABEL: name: test_load_constant_v2p3_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; GFX9-LABEL: name: test_load_constant_v2p3_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 4, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -7403,151 +7403,151 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v2p3_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
     ; VI-LABEL: name: test_load_constant_v2p3_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
     ; GFX9-LABEL: name: test_load_constant_v2p3_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 1, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -7561,16 +7561,16 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_constant_s32_from_1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_ext_load_constant_s32_from_1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_ext_load_constant_s32_from_1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 4)
     $vgpr0 = COPY %1
@@ -7584,16 +7584,16 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_constant_s32_from_2_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_ext_load_constant_s32_from_2_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_ext_load_constant_s32_from_2_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s16), align 4, addrspace 4)
     $vgpr0 = COPY %1
@@ -7608,19 +7608,19 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_constant_s64_from_1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_constant_s64_from_1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_constant_s64_from_1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -7634,19 +7634,19 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_constant_s64_from_2_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_constant_s64_from_2_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_constant_s64_from_2_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -7660,19 +7660,19 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_constant_s64_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_constant_s64_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_constant_s64_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s32), align 4, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -7686,28 +7686,28 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_constant_s128_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; CI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; CI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; VI-LABEL: name: test_ext_load_constant_s128_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; GFX9-LABEL: name: test_ext_load_constant_s128_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_LOAD %0 :: (load (s32), align 4, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -7721,19 +7721,19 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_constant_s64_from_2_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_constant_s64_from_2_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_constant_s64_from_2_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -7747,19 +7747,19 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_constant_s64_from_1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_constant_s64_from_1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_constant_s64_from_1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -7773,145 +7773,145 @@ body: |
 
     ; CI-LABEL: name: test_extload_constant_v2s32_from_4_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_extload_constant_v2s32_from_4_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_extload_constant_v2s32_from_4_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -7925,73 +7925,73 @@ body: |
 
     ; CI-LABEL: name: test_extload_constant_v2s32_from_4_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_extload_constant_v2s32_from_4_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_extload_constant_v2s32_from_4_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -8005,16 +8005,16 @@ body: |
 
     ; CI-LABEL: name: test_extload_constant_v2s32_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 1)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 1)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_extload_constant_v2s32_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-LABEL: name: test_extload_constant_v2s32_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 1)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 1)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -8028,16 +8028,16 @@ body: |
 
     ; CI-LABEL: name: test_extload_constant_v3s32_from_6_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; VI-LABEL: name: test_extload_constant_v3s32_from_6_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; GFX9-LABEL: name: test_extload_constant_v3s32_from_6_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 1)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -8051,16 +8051,16 @@ body: |
 
     ; CI-LABEL: name: test_extload_constant_v4s32_from_8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; VI-LABEL: name: test_extload_constant_v4s32_from_8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-LABEL: name: test_extload_constant_v4s32_from_8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -8074,385 +8074,385 @@ body: |
 
     ; CI-LABEL: name: test_extload_constant_v2s96_from_24_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
-    ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 1)
-    ; CI: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 1)
-    ; CI: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 1)
-    ; CI: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 1)
-    ; CI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
-    ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
-    ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; CI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; CI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
-    ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
-    ; CI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
-    ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
-    ; CI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; CI: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
-    ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 1)
-    ; CI: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
-    ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 1)
-    ; CI: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64)
-    ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 1)
-    ; CI: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
-    ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 1)
-    ; CI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
-    ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
-    ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; CI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; CI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
-    ; CI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
-    ; CI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
-    ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
-    ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
+    ; CI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; CI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 1)
+    ; CI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
+    ; CI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
+    ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; CI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
+    ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
+    ; CI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; CI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
+    ; CI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
+    ; CI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
+    ; CI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
+    ; CI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 1)
+    ; CI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
+    ; CI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
+    ; CI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; CI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; CI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
+    ; CI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
+    ; CI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; CI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
+    ; CI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
+    ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
-    ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 1)
-    ; VI: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 1)
-    ; VI: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 1)
-    ; VI: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 1)
-    ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
-    ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
-    ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; VI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
-    ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
-    ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
-    ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
-    ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; VI: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
-    ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 1)
-    ; VI: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
-    ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 1)
-    ; VI: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64)
-    ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 1)
-    ; VI: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
-    ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 1)
-    ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
-    ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
-    ; VI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
-    ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
-    ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
-    ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
-    ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; VI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 1)
+    ; VI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
+    ; VI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
+    ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; VI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
+    ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
+    ; VI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; VI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
+    ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
+    ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
+    ; VI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
+    ; VI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 1)
+    ; VI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
+    ; VI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
+    ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; VI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
+    ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
+    ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; VI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
+    ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
+    ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
-    ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 1)
-    ; GFX9: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 1)
-    ; GFX9: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 1)
-    ; GFX9: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 1)
-    ; GFX9: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
-    ; GFX9: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
-    ; GFX9: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; GFX9: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; GFX9: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
-    ; GFX9: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
-    ; GFX9: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; GFX9: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
-    ; GFX9: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
-    ; GFX9: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; GFX9: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
-    ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 1)
-    ; GFX9: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
-    ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 1)
-    ; GFX9: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64)
-    ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 1)
-    ; GFX9: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
-    ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 1)
-    ; GFX9: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
-    ; GFX9: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
-    ; GFX9: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; GFX9: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; GFX9: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
-    ; GFX9: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
-    ; GFX9: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; GFX9: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
-    ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
-    ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
+    ; GFX9-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 1)
+    ; GFX9-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
+    ; GFX9-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
+    ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; GFX9-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
+    ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; GFX9-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
+    ; GFX9-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; GFX9-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
+    ; GFX9-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 1)
+    ; GFX9-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
+    ; GFX9-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
+    ; GFX9-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; GFX9-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
+    ; GFX9-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; GFX9-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
+    ; GFX9-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 1)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -8469,193 +8469,193 @@ body: |
 
     ; CI-LABEL: name: test_extload_constant_v2s96_from_24_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 1)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 1)
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s16) from unknown-address + 16, addrspace 1)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s16) from unknown-address + 18, addrspace 1)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s16) from unknown-address + 20, addrspace 1)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s16) from unknown-address + 22, addrspace 1)
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 1)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s16) from unknown-address + 16, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s16) from unknown-address + 18, addrspace 1)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s16) from unknown-address + 20, addrspace 1)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s16) from unknown-address + 22, addrspace 1)
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 1)
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s16) from unknown-address + 16, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s16) from unknown-address + 18, addrspace 1)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s16) from unknown-address + 20, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s16) from unknown-address + 22, addrspace 1)
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s16) from unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s16) from unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s16) from unknown-address + 20, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s16) from unknown-address + 22, addrspace 1)
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 1)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 1)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s16) from unknown-address + 16, addrspace 1)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s16) from unknown-address + 18, addrspace 1)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s16) from unknown-address + 20, addrspace 1)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s16) from unknown-address + 22, addrspace 1)
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 1)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s16) from unknown-address + 16, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s16) from unknown-address + 18, addrspace 1)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s16) from unknown-address + 20, addrspace 1)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s16) from unknown-address + 22, addrspace 1)
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 1)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -8672,40 +8672,40 @@ body: |
 
     ; CI-LABEL: name: test_extload_constant_v2s96_from_24_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 1)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -8722,40 +8722,40 @@ body: |
 
     ; CI-LABEL: name: test_extload_constant_v2s96_from_24_align16
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align16
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 1)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -8772,19 +8772,19 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s512_align32
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512)
     ; VI-LABEL: name: test_load_constant_s512_align32
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512)
     ; GFX9-LABEL: name: test_load_constant_s512_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s512) = G_LOAD %0 :: (load (s512), align 32, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
@@ -8798,19 +8798,19 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v4s128_align32
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4)
-    ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>)
     ; VI-LABEL: name: test_load_constant_v4s128_align32
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>)
     ; GFX9-LABEL: name: test_load_constant_v4s128_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<4 x s128>) = G_LOAD %0 :: (load (<4 x s128>), align 32, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
index 95637b2df9060..5b4176425801a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
@@ -12,22 +12,22 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; CI: $vgpr0 = COPY [[AND]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; CI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; VI-LABEL: name: test_load_flat_s1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; VI: $vgpr0 = COPY [[AND]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; VI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; GFX9-LABEL: name: test_load_flat_s1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; GFX9: $vgpr0 = COPY [[AND]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s1) = G_LOAD %0 :: (load (s1), align 1, addrspace 0)
     %2:_(s32) = G_ZEXT %1
@@ -42,22 +42,22 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s2_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; CI: $vgpr0 = COPY [[AND]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; CI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; VI-LABEL: name: test_load_flat_s2_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; VI: $vgpr0 = COPY [[AND]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; VI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; GFX9-LABEL: name: test_load_flat_s2_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; GFX9: $vgpr0 = COPY [[AND]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s2) = G_LOAD %0 :: (load (s2), align 1, addrspace 0)
     %2:_(s32) = G_ZEXT %1
@@ -72,16 +72,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_flat_s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_flat_s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s8) = G_LOAD %0 :: (load (s8), align 4, addrspace 0)
     %2:_(s32) = G_ANYEXT %1
@@ -96,16 +96,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_flat_s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_flat_s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s8) = G_LOAD %0 :: (load (s8), align 1, addrspace 0)
     %2:_(s32) = G_ANYEXT %1
@@ -120,16 +120,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_flat_s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_flat_s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s16) = G_LOAD %0 :: (load (s16), align 4, addrspace 0)
     %2:_(s32) = G_ANYEXT %1
@@ -144,16 +144,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_flat_s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_flat_s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s16) = G_LOAD %0 :: (load (s16), align 2, addrspace 0)
     %2:_(s32) = G_ANYEXT %1
@@ -168,53 +168,53 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_flat_s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_flat_s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 0)
     %2:_(s32) = G_ANYEXT %1
@@ -229,16 +229,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_flat_s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_flat_s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 0)
     $vgpr0 = COPY %1
@@ -252,43 +252,43 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s32_align2
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: $vgpr0 = COPY [[OR]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; VI-LABEL: name: test_load_flat_s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: $vgpr0 = COPY [[OR]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; GFX9-LABEL: name: test_load_flat_s32_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: $vgpr0 = COPY [[OR]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 0)
     $vgpr0 = COPY %1
@@ -302,85 +302,85 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_flat_s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_flat_s32_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 0)
     $vgpr0 = COPY %1
@@ -394,22 +394,22 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s48_align8
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
-    ; CI: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
-    ; CI: $vgpr0_vgpr1 = COPY [[AND]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; VI-LABEL: name: test_load_flat_s48_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
-    ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
-    ; VI: $vgpr0_vgpr1 = COPY [[AND]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; GFX9-LABEL: name: test_load_flat_s48_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
-    ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
-    ; GFX9: $vgpr0_vgpr1 = COPY [[AND]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s48) = G_LOAD %0 :: (load (s48), align 8, addrspace 0)
     %2:_(s64) = G_ZEXT %1
@@ -424,16 +424,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s64_align8
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; VI-LABEL: name: test_load_flat_s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX9-LABEL: name: test_load_flat_s64_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -447,16 +447,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s64_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; VI-LABEL: name: test_load_flat_s64_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX9-LABEL: name: test_load_flat_s64_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -470,76 +470,76 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s64_align2
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; VI-LABEL: name: test_load_flat_s64_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; GFX9-LABEL: name: test_load_flat_s64_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -553,191 +553,191 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s64_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; VI-LABEL: name: test_load_flat_s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; GFX9-LABEL: name: test_load_flat_s64_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -751,19 +751,19 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s96_align16
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_flat_s96_align16
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_flat_s96_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 16, addrspace 0)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -777,19 +777,19 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s96_align8
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 8)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 8)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_flat_s96_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 8)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 8)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_flat_s96_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 8)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 8)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 8, addrspace 0)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -803,19 +803,19 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s96_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_flat_s96_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_flat_s96_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 0)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -829,103 +829,103 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s96_align2
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_flat_s96_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_flat_s96_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 0)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -939,205 +939,205 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s96_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_flat_s96_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_flat_s96_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 0)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -1151,34 +1151,34 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s160_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 16)
-    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
-    ; CI: S_NOP 0, implicit [[BITCAST]](s160)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 16)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
+    ; CI-NEXT: S_NOP 0, implicit [[BITCAST]](s160)
     ; VI-LABEL: name: test_load_flat_s160_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 16)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
-    ; VI: S_NOP 0, implicit [[BITCAST]](s160)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 16)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
+    ; VI-NEXT: S_NOP 0, implicit [[BITCAST]](s160)
     ; GFX9-LABEL: name: test_load_flat_s160_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 16)
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
-    ; GFX9: S_NOP 0, implicit [[BITCAST]](s160)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 16)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
+    ; GFX9-NEXT: S_NOP 0, implicit [[BITCAST]](s160)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s160) = G_LOAD %0 :: (load (s160), align 4, addrspace 0)
     S_NOP 0, implicit %1
@@ -1192,61 +1192,61 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s224_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4)
-    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; CI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
-    ; CI: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
-    ; CI: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
-    ; CI: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
-    ; CI: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; CI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; CI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; CI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     ; VI-LABEL: name: test_load_flat_s224_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
-    ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
-    ; VI: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
-    ; VI: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
-    ; VI: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; VI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; VI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     ; GFX9-LABEL: name: test_load_flat_s224_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4)
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
-    ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
-    ; GFX9: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 0)
      %2:_(s256) = G_IMPLICIT_DEF
@@ -1263,19 +1263,19 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s128_align16
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
-    ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_flat_s128_align16
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-LABEL: name: test_load_flat_s128_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 16, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -1289,19 +1289,19 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s128_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_flat_s128_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-LABEL: name: test_load_flat_s128_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -1315,262 +1315,262 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s128_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_flat_s128_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-LABEL: name: test_load_flat_s128_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -1584,31 +1584,31 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s256_align32
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
     ; VI-LABEL: name: test_load_flat_s256_align32
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
     ; GFX9-LABEL: name: test_load_flat_s256_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s256) = G_LOAD %0 :: (load (s256), align 16, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -1622,16 +1622,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_p1_align8
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1))
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1))
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; VI-LABEL: name: test_load_flat_p1_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1))
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1))
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX9-LABEL: name: test_load_flat_p1_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1))
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1))
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -1645,16 +1645,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_p1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1), align 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1), align 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; VI-LABEL: name: test_load_flat_p1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1), align 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1), align 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX9-LABEL: name: test_load_flat_p1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1), align 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1), align 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -1668,191 +1668,191 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_p1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; VI-LABEL: name: test_load_flat_p1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; GFX9-LABEL: name: test_load_flat_p1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -1866,16 +1866,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_p3_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3))
-    ; CI: $vgpr0 = COPY [[LOAD]](p3)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3))
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; VI-LABEL: name: test_load_flat_p3_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3))
-    ; VI: $vgpr0 = COPY [[LOAD]](p3)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3))
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; GFX9-LABEL: name: test_load_flat_p3_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3))
-    ; GFX9: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3))
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3)
      %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 0)
     $vgpr0 = COPY %1
@@ -1889,16 +1889,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_p4_align8
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4))
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4))
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; VI-LABEL: name: test_load_flat_p4_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4))
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4))
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; GFX9-LABEL: name: test_load_flat_p4_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4))
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4))
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(p4) = G_LOAD %0 :: (load (p4), align 8, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -1912,16 +1912,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_p4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4), align 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4), align 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; VI-LABEL: name: test_load_flat_p4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4), align 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4), align 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; GFX9-LABEL: name: test_load_flat_p4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4), align 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4), align 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(p4) = G_LOAD %0 :: (load (p4), align 4, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -1935,76 +1935,76 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_p4_align2
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     ; VI-LABEL: name: test_load_flat_p4_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     ; GFX9-LABEL: name: test_load_flat_p4_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(p4) = G_LOAD %0 :: (load (p4), align 2, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -2018,191 +2018,191 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_p4_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     ; VI-LABEL: name: test_load_flat_p4_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     ; GFX9-LABEL: name: test_load_flat_p4_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(p4) = G_LOAD %0 :: (load (p4), align 1, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -2216,16 +2216,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_p5_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5))
-    ; CI: $vgpr0 = COPY [[LOAD]](p5)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5))
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; VI-LABEL: name: test_load_flat_p5_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5))
-    ; VI: $vgpr0 = COPY [[LOAD]](p5)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5))
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; GFX9-LABEL: name: test_load_flat_p5_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5))
-    ; GFX9: $vgpr0 = COPY [[LOAD]](p5)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5))
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 0)
     $vgpr0 = COPY %1
@@ -2239,46 +2239,46 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_p5_align2
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; CI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; VI-LABEL: name: test_load_flat_p5_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; VI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; GFX9-LABEL: name: test_load_flat_p5_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 0)
     $vgpr0 = COPY %1
@@ -2292,88 +2292,88 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_p5_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; CI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; VI-LABEL: name: test_load_flat_p5_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; VI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; GFX9-LABEL: name: test_load_flat_p5_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 0)
     $vgpr0 = COPY %1
@@ -2387,50 +2387,50 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2s8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_flat_v2s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_flat_v2s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 4, addrspace 0)
     %2:_(s16) = G_BITCAST %1
@@ -2446,50 +2446,50 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2s8_align2
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_flat_v2s8_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_flat_v2s8_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 0)
     %2:_(s16) = G_BITCAST %1
@@ -2505,53 +2505,53 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_flat_v2s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_flat_v2s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 0)
     %2:_(s16) = G_BITCAST %1
@@ -2567,87 +2567,87 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v3s8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_flat_v3s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_flat_v3s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 4, addrspace 0)
     %2:_(s24) = G_BITCAST %1
@@ -2663,96 +2663,96 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v3s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_flat_v3s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_flat_v3s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 0)
     %2:_(s24) = G_BITCAST %1
@@ -2768,67 +2768,67 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v4s8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_flat_v4s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_flat_v4s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 0)
     %2:_(s32) = G_BITCAST %1
@@ -2843,73 +2843,73 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v4s8_align2
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_flat_v4s8_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_flat_v4s8_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 2, addrspace 0)
     %2:_(s32) = G_BITCAST %1
@@ -2924,85 +2924,85 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v4s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_flat_v4s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_flat_v4s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 1, addrspace 0)
     %2:_(s32) = G_BITCAST %1
@@ -3017,112 +3017,112 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v8s8_align8
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
-    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_flat_v8s8_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_flat_v8s8_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 0)
     %2:_(<2 x s32>) = G_BITCAST %1
@@ -3137,190 +3137,190 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v16s8_align16
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
-    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; CI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; CI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; CI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; CI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; CI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; CI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; VI-LABEL: name: test_load_flat_v16s8_align16
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_flat_v16s8_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; GFX9: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; GFX9: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; GFX9: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; GFX9: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; GFX9-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 16, addrspace 0)
     %2:_(<4 x s32>) = G_BITCAST %1
@@ -3335,358 +3335,358 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v32s8_align32
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
-    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; CI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s32>)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32)
-    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32)
-    ; CI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; CI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; CI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32)
-    ; CI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; CI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; CI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32)
-    ; CI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; CI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; CI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32)
-    ; CI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; CI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; CI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32)
-    ; CI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; CI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; CI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32)
-    ; CI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; CI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; CI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C3]](s32)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C4]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C4]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C4]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C4]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C4]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C4]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C4]]
-    ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C4]]
-    ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C1]](s32)
-    ; CI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; CI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C4]]
-    ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C2]](s32)
-    ; CI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C4]]
-    ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C3]](s32)
-    ; CI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; CI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C4]]
-    ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C4]]
-    ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C1]](s32)
-    ; CI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; CI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C4]]
-    ; CI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C2]](s32)
-    ; CI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C4]]
-    ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C3]](s32)
-    ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; CI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C4]]
-    ; CI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C4]]
-    ; CI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C1]](s32)
-    ; CI: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
-    ; CI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C4]]
-    ; CI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C2]](s32)
-    ; CI: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
-    ; CI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C4]]
-    ; CI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C3]](s32)
-    ; CI: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
-    ; CI: [[AND28:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C4]]
-    ; CI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C4]]
-    ; CI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C1]](s32)
-    ; CI: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
-    ; CI: [[AND30:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C4]]
-    ; CI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C2]](s32)
-    ; CI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
-    ; CI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C4]]
-    ; CI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C3]](s32)
-    ; CI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s32>)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32)
+    ; CI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32)
+    ; CI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32)
+    ; CI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; CI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32)
+    ; CI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; CI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32)
+    ; CI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; CI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32)
+    ; CI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; CI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32)
+    ; CI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; CI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C3]](s32)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C4]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C4]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C4]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C4]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C4]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C4]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C4]]
+    ; CI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C4]]
+    ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C1]](s32)
+    ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; CI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C4]]
+    ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C2]](s32)
+    ; CI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; CI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C4]]
+    ; CI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C3]](s32)
+    ; CI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; CI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C4]]
+    ; CI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C4]]
+    ; CI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C1]](s32)
+    ; CI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; CI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C4]]
+    ; CI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C2]](s32)
+    ; CI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; CI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C4]]
+    ; CI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C3]](s32)
+    ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; CI-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C4]]
+    ; CI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C4]]
+    ; CI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C1]](s32)
+    ; CI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
+    ; CI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C4]]
+    ; CI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C2]](s32)
+    ; CI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
+    ; CI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C4]]
+    ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C3]](s32)
+    ; CI-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
+    ; CI-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C4]]
+    ; CI-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C4]]
+    ; CI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C1]](s32)
+    ; CI-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
+    ; CI-NEXT: [[AND30:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C4]]
+    ; CI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C2]](s32)
+    ; CI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
+    ; CI-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C4]]
+    ; CI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C3]](s32)
+    ; CI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     ; VI-LABEL: name: test_load_flat_v32s8_align32
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s32>)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32)
-    ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32)
-    ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32)
-    ; VI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; VI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; VI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32)
-    ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; VI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C3]](s32)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C4]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C4]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C4]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C4]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C4]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C4]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C4]]
-    ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C4]]
-    ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C1]](s32)
-    ; VI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C4]]
-    ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C2]](s32)
-    ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C4]]
-    ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C3]](s32)
-    ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C4]]
-    ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C4]]
-    ; VI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C1]](s32)
-    ; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C4]]
-    ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C2]](s32)
-    ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C4]]
-    ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C3]](s32)
-    ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; VI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C4]]
-    ; VI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C4]]
-    ; VI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C1]](s32)
-    ; VI: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
-    ; VI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C4]]
-    ; VI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C2]](s32)
-    ; VI: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
-    ; VI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C4]]
-    ; VI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C3]](s32)
-    ; VI: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
-    ; VI: [[AND28:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C4]]
-    ; VI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C4]]
-    ; VI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C1]](s32)
-    ; VI: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
-    ; VI: [[AND30:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C4]]
-    ; VI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C2]](s32)
-    ; VI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
-    ; VI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C4]]
-    ; VI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C3]](s32)
-    ; VI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s32>)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32)
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32)
+    ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32)
+    ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32)
+    ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32)
+    ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C3]](s32)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C4]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C4]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C4]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C4]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C4]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C4]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C4]]
+    ; VI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C4]]
+    ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C1]](s32)
+    ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; VI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C4]]
+    ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C2]](s32)
+    ; VI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; VI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C4]]
+    ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C3]](s32)
+    ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; VI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C4]]
+    ; VI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C4]]
+    ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C1]](s32)
+    ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; VI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C4]]
+    ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C2]](s32)
+    ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; VI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C4]]
+    ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C3]](s32)
+    ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; VI-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C4]]
+    ; VI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C4]]
+    ; VI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C1]](s32)
+    ; VI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
+    ; VI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C4]]
+    ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C2]](s32)
+    ; VI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
+    ; VI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C4]]
+    ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C3]](s32)
+    ; VI-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
+    ; VI-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C4]]
+    ; VI-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C4]]
+    ; VI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C1]](s32)
+    ; VI-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
+    ; VI-NEXT: [[AND30:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C4]]
+    ; VI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C2]](s32)
+    ; VI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
+    ; VI-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C4]]
+    ; VI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C3]](s32)
+    ; VI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     ; GFX9-LABEL: name: test_load_flat_v32s8_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s32>)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32)
-    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32)
-    ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; GFX9: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32)
-    ; GFX9: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; GFX9: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; GFX9: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32)
-    ; GFX9: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; GFX9: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; GFX9: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32)
-    ; GFX9: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; GFX9: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; GFX9: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32)
-    ; GFX9: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; GFX9: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; GFX9: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32)
-    ; GFX9: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; GFX9: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; GFX9: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C3]](s32)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C4]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C4]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C4]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C4]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C4]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C4]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C4]]
-    ; GFX9: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C4]]
-    ; GFX9: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C1]](s32)
-    ; GFX9: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; GFX9: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C4]]
-    ; GFX9: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C2]](s32)
-    ; GFX9: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; GFX9: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C4]]
-    ; GFX9: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C3]](s32)
-    ; GFX9: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; GFX9: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C4]]
-    ; GFX9: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C4]]
-    ; GFX9: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C1]](s32)
-    ; GFX9: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; GFX9: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C4]]
-    ; GFX9: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C2]](s32)
-    ; GFX9: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; GFX9: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C4]]
-    ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C3]](s32)
-    ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; GFX9: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C4]]
-    ; GFX9: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C4]]
-    ; GFX9: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C1]](s32)
-    ; GFX9: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
-    ; GFX9: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C4]]
-    ; GFX9: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C2]](s32)
-    ; GFX9: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
-    ; GFX9: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C4]]
-    ; GFX9: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C3]](s32)
-    ; GFX9: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
-    ; GFX9: [[AND28:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C4]]
-    ; GFX9: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C4]]
-    ; GFX9: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C1]](s32)
-    ; GFX9: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
-    ; GFX9: [[AND30:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C4]]
-    ; GFX9: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C2]](s32)
-    ; GFX9: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
-    ; GFX9: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C4]]
-    ; GFX9: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C3]](s32)
-    ; GFX9: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s32>)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32)
+    ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32)
+    ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32)
+    ; GFX9-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32)
+    ; GFX9-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; GFX9-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32)
+    ; GFX9-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; GFX9-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32)
+    ; GFX9-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; GFX9-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32)
+    ; GFX9-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C3]](s32)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C4]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C4]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C4]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C4]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C4]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C4]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C4]]
+    ; GFX9-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C4]]
+    ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; GFX9-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C4]]
+    ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; GFX9-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C4]]
+    ; GFX9-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C3]](s32)
+    ; GFX9-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; GFX9-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C4]]
+    ; GFX9-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C4]]
+    ; GFX9-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; GFX9-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C4]]
+    ; GFX9-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; GFX9-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C4]]
+    ; GFX9-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C3]](s32)
+    ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; GFX9-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C4]]
+    ; GFX9-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C4]]
+    ; GFX9-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
+    ; GFX9-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C4]]
+    ; GFX9-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
+    ; GFX9-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C4]]
+    ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C3]](s32)
+    ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
+    ; GFX9-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C4]]
+    ; GFX9-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C4]]
+    ; GFX9-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
+    ; GFX9-NEXT: [[AND30:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C4]]
+    ; GFX9-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
+    ; GFX9-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C4]]
+    ; GFX9-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C3]](s32)
+    ; GFX9-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<32 x s8>) = G_LOAD %0 :: (load (<32 x s8>), align 32, addrspace 0)
     %2:_(<8 x s32>) = G_BITCAST %1
@@ -3702,16 +3702,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>))
-    ; CI: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>))
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; VI-LABEL: name: test_load_flat_v2s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>))
-    ; VI: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>))
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v2s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>))
-    ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>))
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 0)
     $vgpr0 = COPY %1
@@ -3725,40 +3725,40 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; VI-LABEL: name: test_load_flat_v2s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v2s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 0)
     $vgpr0 = COPY %1
@@ -3772,99 +3772,99 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; VI-LABEL: name: test_load_flat_v2s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v2s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 0)
     $vgpr0 = COPY %1
@@ -3878,84 +3878,84 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v3s16_align8
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_flat_v3s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v3s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 0)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -3971,90 +3971,90 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v3s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_flat_v3s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v3s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 0)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -4070,90 +4070,90 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v3s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_flat_v3s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v3s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 0)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -4169,177 +4169,177 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v3s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C7]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
-    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C7]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_flat_v3s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C6]]
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C6]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C6]]
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C6]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v3s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 0)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -4355,16 +4355,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v4s16_align8
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; VI-LABEL: name: test_load_flat_v4s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v4s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -4378,16 +4378,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v4s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; VI-LABEL: name: test_load_flat_v4s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v4s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -4401,72 +4401,72 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v4s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; VI-LABEL: name: test_load_flat_v4s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v4s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -4480,180 +4480,180 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v4s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; VI-LABEL: name: test_load_flat_v4s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C4]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C4]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v4s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -4667,19 +4667,19 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v8s16_align8
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8)
-    ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
     ; VI-LABEL: name: test_load_flat_v8s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v8s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 8, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -4693,16 +4693,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2s32_align8
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_load_flat_v2s32_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_flat_v2s32_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -4716,16 +4716,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_load_flat_v2s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_flat_v2s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -4740,16 +4740,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_load_flat_v2s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_flat_v2s32_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -4763,16 +4763,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v3s32_align16
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; VI-LABEL: name: test_load_flat_v3s32_align16
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; GFX9-LABEL: name: test_load_flat_v3s32_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 16, addrspace 0)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -4788,16 +4788,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v3s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; VI-LABEL: name: test_load_flat_v3s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; GFX9-LABEL: name: test_load_flat_v3s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 0)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -4811,16 +4811,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v4s32_align16
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; VI-LABEL: name: test_load_flat_v4s32_align16
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_flat_v4s32_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -4834,16 +4834,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v4s32_align8
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; VI-LABEL: name: test_load_flat_v4s32_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_flat_v4s32_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -4857,16 +4857,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v4s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; VI-LABEL: name: test_load_flat_v4s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_flat_v4s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -4880,28 +4880,28 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v8s32_align32
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
     ; VI-LABEL: name: test_load_flat_v8s32_align32
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
     ; GFX9-LABEL: name: test_load_flat_v8s32_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 32, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -4915,46 +4915,46 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v16s32_align32
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<4 x s32>) from unknown-address + 32, align 32)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<4 x s32>) from unknown-address + 48)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<4 x s32>) from unknown-address + 32, align 32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<4 x s32>) from unknown-address + 48)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
     ; VI-LABEL: name: test_load_flat_v16s32_align32
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<4 x s32>) from unknown-address + 32, align 32)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<4 x s32>) from unknown-address + 48)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<4 x s32>) from unknown-address + 32, align 32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<4 x s32>) from unknown-address + 48)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
     ; GFX9-LABEL: name: test_load_flat_v16s32_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<4 x s32>) from unknown-address + 32, align 32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<4 x s32>) from unknown-address + 48)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<4 x s32>) from unknown-address + 32, align 32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<4 x s32>) from unknown-address + 48)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 32, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
@@ -4968,16 +4968,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2s64_align16
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>))
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>))
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; VI-LABEL: name: test_load_flat_v2s64_align16
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>))
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>))
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; GFX9-LABEL: name: test_load_flat_v2s64_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>))
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>))
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 16, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -4991,16 +4991,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2s64_align8
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; VI-LABEL: name: test_load_flat_v2s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; GFX9-LABEL: name: test_load_flat_v2s64_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 8, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -5014,16 +5014,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2s64_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 4)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 4)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; VI-LABEL: name: test_load_flat_v2s64_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 4)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 4)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; GFX9-LABEL: name: test_load_flat_v2s64_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 4)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -5037,133 +5037,133 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2s64_align2
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 12)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 14)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 12)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 14)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; VI-LABEL: name: test_load_flat_v2s64_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 12)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 14)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 12)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 14)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-LABEL: name: test_load_flat_v2s64_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 12)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 14)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 12)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 14)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 2, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -5177,348 +5177,348 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2s64_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
-    ; CI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; CI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
-    ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
-    ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; CI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; CI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
-    ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
-    ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
-    ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
+    ; CI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; CI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
+    ; CI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; CI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; CI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
+    ; CI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; CI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; CI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
+    ; CI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; CI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; CI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
+    ; CI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
+    ; CI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; CI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; CI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; CI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; VI-LABEL: name: test_load_flat_v2s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
-    ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
+    ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; VI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; VI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; VI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; VI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-LABEL: name: test_load_flat_v2s64_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
-    ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
+    ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; GFX9-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; GFX9-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -5532,37 +5532,37 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v3s64_align32
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16)
-    ; CI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_flat_v3s64_align32
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16)
-    ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_flat_v3s64_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16)
-    ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 0)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -5578,37 +5578,37 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v3s64_align8
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16)
-    ; CI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_flat_v3s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16)
-    ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_flat_v3s64_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16)
-    ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 0)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -5624,508 +5624,508 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v3s64_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
-    ; CI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; CI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
-    ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
-    ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; CI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; CI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
-    ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
-    ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
-    ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; CI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64)
-    ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16)
-    ; CI: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17)
-    ; CI: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18)
-    ; CI: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19)
-    ; CI: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20)
-    ; CI: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21)
-    ; CI: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22)
-    ; CI: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23)
-    ; CI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; CI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C9]]
-    ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY8]](s32)
-    ; CI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
-    ; CI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]]
-    ; CI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; CI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C9]]
-    ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY9]](s32)
-    ; CI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
-    ; CI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]]
-    ; CI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; CI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C9]]
-    ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY10]](s32)
-    ; CI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
-    ; CI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]]
-    ; CI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; CI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C9]]
-    ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY11]](s32)
-    ; CI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
-    ; CI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]]
-    ; CI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; CI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; CI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32)
-    ; CI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; CI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; CI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
-    ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
+    ; CI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; CI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
+    ; CI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; CI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; CI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
+    ; CI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; CI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; CI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
+    ; CI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; CI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; CI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
+    ; CI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
+    ; CI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; CI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; CI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; CI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; CI-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64)
+    ; CI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16)
+    ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; CI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17)
+    ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18)
+    ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19)
+    ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20)
+    ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21)
+    ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22)
+    ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23)
+    ; CI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; CI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; CI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C9]]
+    ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY8]](s32)
+    ; CI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
+    ; CI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]]
+    ; CI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; CI-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; CI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C9]]
+    ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY9]](s32)
+    ; CI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
+    ; CI-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]]
+    ; CI-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; CI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; CI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C9]]
+    ; CI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY10]](s32)
+    ; CI-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
+    ; CI-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]]
+    ; CI-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; CI-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; CI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C9]]
+    ; CI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY11]](s32)
+    ; CI-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
+    ; CI-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]]
+    ; CI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; CI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; CI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32)
+    ; CI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; CI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; CI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; CI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
+    ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_flat_v3s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
-    ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16)
-    ; VI: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17)
-    ; VI: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18)
-    ; VI: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19)
-    ; VI: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20)
-    ; VI: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21)
-    ; VI: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22)
-    ; VI: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23)
-    ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
-    ; VI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
-    ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
-    ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
-    ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; VI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
-    ; VI: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
-    ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
-    ; VI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
-    ; VI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; VI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
-    ; VI: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
-    ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
-    ; VI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
-    ; VI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; VI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; VI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
-    ; VI: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
-    ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
-    ; VI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
-    ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
-    ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
-    ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
+    ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; VI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; VI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; VI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; VI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; VI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16)
+    ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; VI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17)
+    ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18)
+    ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19)
+    ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20)
+    ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21)
+    ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22)
+    ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23)
+    ; VI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; VI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; VI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
+    ; VI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
+    ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
+    ; VI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
+    ; VI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; VI-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; VI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
+    ; VI-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
+    ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
+    ; VI-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
+    ; VI-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; VI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; VI-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
+    ; VI-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
+    ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
+    ; VI-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
+    ; VI-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; VI-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; VI-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
+    ; VI-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
+    ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
+    ; VI-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
+    ; VI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; VI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
+    ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; VI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; VI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
+    ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_flat_v3s64_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
-    ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; GFX9: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16)
-    ; GFX9: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17)
-    ; GFX9: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18)
-    ; GFX9: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19)
-    ; GFX9: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20)
-    ; GFX9: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21)
-    ; GFX9: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22)
-    ; GFX9: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23)
-    ; GFX9: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; GFX9: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; GFX9: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
-    ; GFX9: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
-    ; GFX9: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
-    ; GFX9: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
-    ; GFX9: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; GFX9: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; GFX9: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
-    ; GFX9: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
-    ; GFX9: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
-    ; GFX9: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
-    ; GFX9: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; GFX9: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; GFX9: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
-    ; GFX9: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
-    ; GFX9: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
-    ; GFX9: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
-    ; GFX9: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; GFX9: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; GFX9: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
-    ; GFX9: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
-    ; GFX9: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
-    ; GFX9: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
-    ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; GFX9: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
-    ; GFX9: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
-    ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
+    ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; GFX9-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; GFX9-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; GFX9-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16)
+    ; GFX9-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17)
+    ; GFX9-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18)
+    ; GFX9-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19)
+    ; GFX9-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20)
+    ; GFX9-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21)
+    ; GFX9-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22)
+    ; GFX9-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23)
+    ; GFX9-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; GFX9-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
+    ; GFX9-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
+    ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
+    ; GFX9-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; GFX9-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
+    ; GFX9-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
+    ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
+    ; GFX9-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; GFX9-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
+    ; GFX9-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
+    ; GFX9-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
+    ; GFX9-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; GFX9-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
+    ; GFX9-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
+    ; GFX9-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
+    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; GFX9-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; GFX9-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 0)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -6141,28 +6141,28 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v4s64_align32
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
     ; VI-LABEL: name: test_load_flat_v4s64_align32
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_flat_v4s64_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 32, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -6176,28 +6176,28 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v4s64_align8
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16, align 8)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16, align 8)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
     ; VI-LABEL: name: test_load_flat_v4s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16, align 8)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16, align 8)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_flat_v4s64_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16, align 8)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16, align 8)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 8, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -6211,659 +6211,659 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v4s64_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
-    ; CI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; CI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
-    ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
-    ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; CI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; CI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
-    ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
-    ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
-    ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; CI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64)
-    ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16)
-    ; CI: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17)
-    ; CI: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18)
-    ; CI: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19)
-    ; CI: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20)
-    ; CI: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21)
-    ; CI: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22)
-    ; CI: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23)
-    ; CI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; CI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C9]]
-    ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY8]](s32)
-    ; CI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
-    ; CI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]]
-    ; CI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; CI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C9]]
-    ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY9]](s32)
-    ; CI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
-    ; CI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]]
-    ; CI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; CI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C9]]
-    ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY10]](s32)
-    ; CI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
-    ; CI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]]
-    ; CI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; CI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C9]]
-    ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY11]](s32)
-    ; CI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
-    ; CI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]]
-    ; CI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; CI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; CI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32)
-    ; CI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; CI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; CI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
-    ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; CI: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C11]](s64)
-    ; CI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24)
-    ; CI: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
-    ; CI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p0) :: (load (s8) from unknown-address + 25)
-    ; CI: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
-    ; CI: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p0) :: (load (s8) from unknown-address + 26)
-    ; CI: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
-    ; CI: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load (s8) from unknown-address + 27)
-    ; CI: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
-    ; CI: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p0) :: (load (s8) from unknown-address + 28)
-    ; CI: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
-    ; CI: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p0) :: (load (s8) from unknown-address + 29)
-    ; CI: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
-    ; CI: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p0) :: (load (s8) from unknown-address + 30)
-    ; CI: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
-    ; CI: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load (s8) from unknown-address + 31)
-    ; CI: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
-    ; CI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
-    ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LOAD25]], [[C9]]
-    ; CI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY12]](s32)
-    ; CI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL18]](s32)
-    ; CI: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]]
-    ; CI: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
-    ; CI: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
-    ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LOAD27]], [[C9]]
-    ; CI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY13]](s32)
-    ; CI: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL19]](s32)
-    ; CI: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]]
-    ; CI: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
-    ; CI: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
-    ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LOAD29]], [[C9]]
-    ; CI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY14]](s32)
-    ; CI: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL20]](s32)
-    ; CI: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]]
-    ; CI: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
-    ; CI: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
-    ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LOAD31]], [[C9]]
-    ; CI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY15]](s32)
-    ; CI: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL21]](s32)
-    ; CI: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]]
-    ; CI: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
-    ; CI: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
-    ; CI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C10]](s32)
-    ; CI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
-    ; CI: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
-    ; CI: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
-    ; CI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C10]](s32)
-    ; CI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
-    ; CI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
+    ; CI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; CI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
+    ; CI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; CI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; CI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
+    ; CI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; CI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; CI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
+    ; CI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; CI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; CI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
+    ; CI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
+    ; CI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; CI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; CI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; CI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; CI-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64)
+    ; CI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16)
+    ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; CI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17)
+    ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18)
+    ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19)
+    ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20)
+    ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21)
+    ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22)
+    ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23)
+    ; CI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; CI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; CI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C9]]
+    ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY8]](s32)
+    ; CI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
+    ; CI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]]
+    ; CI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; CI-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; CI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C9]]
+    ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY9]](s32)
+    ; CI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
+    ; CI-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]]
+    ; CI-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; CI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; CI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C9]]
+    ; CI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY10]](s32)
+    ; CI-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
+    ; CI-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]]
+    ; CI-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; CI-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; CI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C9]]
+    ; CI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY11]](s32)
+    ; CI-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
+    ; CI-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]]
+    ; CI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; CI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; CI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32)
+    ; CI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; CI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; CI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; CI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
+    ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; CI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C11]](s64)
+    ; CI-NEXT: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24)
+    ; CI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
+    ; CI-NEXT: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p0) :: (load (s8) from unknown-address + 25)
+    ; CI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p0) :: (load (s8) from unknown-address + 26)
+    ; CI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load (s8) from unknown-address + 27)
+    ; CI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
+    ; CI-NEXT: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p0) :: (load (s8) from unknown-address + 28)
+    ; CI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
+    ; CI-NEXT: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p0) :: (load (s8) from unknown-address + 29)
+    ; CI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
+    ; CI-NEXT: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p0) :: (load (s8) from unknown-address + 30)
+    ; CI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
+    ; CI-NEXT: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load (s8) from unknown-address + 31)
+    ; CI-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
+    ; CI-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
+    ; CI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LOAD25]], [[C9]]
+    ; CI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY12]](s32)
+    ; CI-NEXT: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL18]](s32)
+    ; CI-NEXT: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]]
+    ; CI-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
+    ; CI-NEXT: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
+    ; CI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LOAD27]], [[C9]]
+    ; CI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY13]](s32)
+    ; CI-NEXT: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL19]](s32)
+    ; CI-NEXT: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]]
+    ; CI-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
+    ; CI-NEXT: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
+    ; CI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LOAD29]], [[C9]]
+    ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY14]](s32)
+    ; CI-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL20]](s32)
+    ; CI-NEXT: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]]
+    ; CI-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
+    ; CI-NEXT: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
+    ; CI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LOAD31]], [[C9]]
+    ; CI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY15]](s32)
+    ; CI-NEXT: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL21]](s32)
+    ; CI-NEXT: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]]
+    ; CI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
+    ; CI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
+    ; CI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C10]](s32)
+    ; CI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
+    ; CI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
+    ; CI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
+    ; CI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C10]](s32)
+    ; CI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
+    ; CI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
     ; VI-LABEL: name: test_load_flat_v4s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
-    ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16)
-    ; VI: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17)
-    ; VI: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18)
-    ; VI: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19)
-    ; VI: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20)
-    ; VI: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21)
-    ; VI: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22)
-    ; VI: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23)
-    ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
-    ; VI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
-    ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
-    ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
-    ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; VI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
-    ; VI: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
-    ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
-    ; VI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
-    ; VI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; VI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
-    ; VI: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
-    ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
-    ; VI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
-    ; VI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; VI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; VI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
-    ; VI: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
-    ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
-    ; VI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
-    ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
-    ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
-    ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; VI: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C10]](s64)
-    ; VI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24)
-    ; VI: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
-    ; VI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p0) :: (load (s8) from unknown-address + 25)
-    ; VI: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
-    ; VI: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p0) :: (load (s8) from unknown-address + 26)
-    ; VI: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
-    ; VI: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load (s8) from unknown-address + 27)
-    ; VI: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
-    ; VI: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p0) :: (load (s8) from unknown-address + 28)
-    ; VI: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
-    ; VI: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p0) :: (load (s8) from unknown-address + 29)
-    ; VI: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
-    ; VI: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p0) :: (load (s8) from unknown-address + 30)
-    ; VI: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
-    ; VI: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load (s8) from unknown-address + 31)
-    ; VI: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
-    ; VI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
-    ; VI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32)
-    ; VI: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]]
-    ; VI: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16)
-    ; VI: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]]
-    ; VI: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
-    ; VI: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
-    ; VI: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32)
-    ; VI: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]]
-    ; VI: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16)
-    ; VI: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]]
-    ; VI: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
-    ; VI: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
-    ; VI: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32)
-    ; VI: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]]
-    ; VI: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16)
-    ; VI: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]]
-    ; VI: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
-    ; VI: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
-    ; VI: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32)
-    ; VI: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]]
-    ; VI: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16)
-    ; VI: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]]
-    ; VI: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
-    ; VI: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
-    ; VI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32)
-    ; VI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
-    ; VI: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
-    ; VI: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
-    ; VI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32)
-    ; VI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
-    ; VI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
+    ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; VI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; VI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; VI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; VI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; VI-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; VI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16)
+    ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; VI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17)
+    ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18)
+    ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19)
+    ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20)
+    ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21)
+    ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22)
+    ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23)
+    ; VI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; VI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; VI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
+    ; VI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
+    ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
+    ; VI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
+    ; VI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; VI-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; VI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
+    ; VI-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
+    ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
+    ; VI-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
+    ; VI-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; VI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; VI-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
+    ; VI-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
+    ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
+    ; VI-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
+    ; VI-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; VI-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; VI-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
+    ; VI-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
+    ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
+    ; VI-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
+    ; VI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; VI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
+    ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; VI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; VI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
+    ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C10]](s64)
+    ; VI-NEXT: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24)
+    ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
+    ; VI-NEXT: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p0) :: (load (s8) from unknown-address + 25)
+    ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p0) :: (load (s8) from unknown-address + 26)
+    ; VI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load (s8) from unknown-address + 27)
+    ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p0) :: (load (s8) from unknown-address + 28)
+    ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p0) :: (load (s8) from unknown-address + 29)
+    ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p0) :: (load (s8) from unknown-address + 30)
+    ; VI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load (s8) from unknown-address + 31)
+    ; VI-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
+    ; VI-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
+    ; VI-NEXT: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32)
+    ; VI-NEXT: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]]
+    ; VI-NEXT: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16)
+    ; VI-NEXT: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]]
+    ; VI-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
+    ; VI-NEXT: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
+    ; VI-NEXT: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32)
+    ; VI-NEXT: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]]
+    ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16)
+    ; VI-NEXT: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]]
+    ; VI-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
+    ; VI-NEXT: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
+    ; VI-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32)
+    ; VI-NEXT: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]]
+    ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16)
+    ; VI-NEXT: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]]
+    ; VI-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
+    ; VI-NEXT: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
+    ; VI-NEXT: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32)
+    ; VI-NEXT: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]]
+    ; VI-NEXT: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16)
+    ; VI-NEXT: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]]
+    ; VI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
+    ; VI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
+    ; VI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32)
+    ; VI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
+    ; VI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
+    ; VI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
+    ; VI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32)
+    ; VI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
+    ; VI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_flat_v4s64_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
-    ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; GFX9: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16)
-    ; GFX9: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17)
-    ; GFX9: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18)
-    ; GFX9: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19)
-    ; GFX9: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20)
-    ; GFX9: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21)
-    ; GFX9: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22)
-    ; GFX9: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23)
-    ; GFX9: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; GFX9: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; GFX9: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
-    ; GFX9: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
-    ; GFX9: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
-    ; GFX9: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
-    ; GFX9: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; GFX9: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; GFX9: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
-    ; GFX9: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
-    ; GFX9: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
-    ; GFX9: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
-    ; GFX9: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; GFX9: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; GFX9: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
-    ; GFX9: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
-    ; GFX9: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
-    ; GFX9: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
-    ; GFX9: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; GFX9: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; GFX9: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
-    ; GFX9: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
-    ; GFX9: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
-    ; GFX9: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
-    ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; GFX9: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
-    ; GFX9: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
-    ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; GFX9: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C10]](s64)
-    ; GFX9: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24)
-    ; GFX9: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
-    ; GFX9: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p0) :: (load (s8) from unknown-address + 25)
-    ; GFX9: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
-    ; GFX9: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p0) :: (load (s8) from unknown-address + 26)
-    ; GFX9: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
-    ; GFX9: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load (s8) from unknown-address + 27)
-    ; GFX9: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
-    ; GFX9: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p0) :: (load (s8) from unknown-address + 28)
-    ; GFX9: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
-    ; GFX9: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p0) :: (load (s8) from unknown-address + 29)
-    ; GFX9: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
-    ; GFX9: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p0) :: (load (s8) from unknown-address + 30)
-    ; GFX9: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
-    ; GFX9: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load (s8) from unknown-address + 31)
-    ; GFX9: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
-    ; GFX9: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
-    ; GFX9: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32)
-    ; GFX9: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]]
-    ; GFX9: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16)
-    ; GFX9: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]]
-    ; GFX9: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
-    ; GFX9: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
-    ; GFX9: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32)
-    ; GFX9: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]]
-    ; GFX9: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16)
-    ; GFX9: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]]
-    ; GFX9: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
-    ; GFX9: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
-    ; GFX9: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32)
-    ; GFX9: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]]
-    ; GFX9: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16)
-    ; GFX9: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]]
-    ; GFX9: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
-    ; GFX9: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
-    ; GFX9: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32)
-    ; GFX9: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]]
-    ; GFX9: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16)
-    ; GFX9: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]]
-    ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
-    ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
-    ; GFX9: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32)
-    ; GFX9: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
-    ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
-    ; GFX9: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
-    ; GFX9: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32)
-    ; GFX9: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
-    ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
+    ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; GFX9-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; GFX9-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; GFX9-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; GFX9-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16)
+    ; GFX9-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17)
+    ; GFX9-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18)
+    ; GFX9-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19)
+    ; GFX9-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20)
+    ; GFX9-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21)
+    ; GFX9-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22)
+    ; GFX9-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23)
+    ; GFX9-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; GFX9-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
+    ; GFX9-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
+    ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
+    ; GFX9-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; GFX9-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
+    ; GFX9-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
+    ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
+    ; GFX9-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; GFX9-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
+    ; GFX9-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
+    ; GFX9-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
+    ; GFX9-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; GFX9-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
+    ; GFX9-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
+    ; GFX9-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
+    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; GFX9-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; GFX9-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; GFX9-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C10]](s64)
+    ; GFX9-NEXT: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24)
+    ; GFX9-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p0) :: (load (s8) from unknown-address + 25)
+    ; GFX9-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p0) :: (load (s8) from unknown-address + 26)
+    ; GFX9-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load (s8) from unknown-address + 27)
+    ; GFX9-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
+    ; GFX9-NEXT: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p0) :: (load (s8) from unknown-address + 28)
+    ; GFX9-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
+    ; GFX9-NEXT: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p0) :: (load (s8) from unknown-address + 29)
+    ; GFX9-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
+    ; GFX9-NEXT: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p0) :: (load (s8) from unknown-address + 30)
+    ; GFX9-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
+    ; GFX9-NEXT: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load (s8) from unknown-address + 31)
+    ; GFX9-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
+    ; GFX9-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32)
+    ; GFX9-NEXT: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]]
+    ; GFX9-NEXT: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]]
+    ; GFX9-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
+    ; GFX9-NEXT: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32)
+    ; GFX9-NEXT: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]]
+    ; GFX9-NEXT: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]]
+    ; GFX9-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
+    ; GFX9-NEXT: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32)
+    ; GFX9-NEXT: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]]
+    ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]]
+    ; GFX9-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
+    ; GFX9-NEXT: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32)
+    ; GFX9-NEXT: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]]
+    ; GFX9-NEXT: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]]
+    ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
+    ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
+    ; GFX9-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
+    ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
+    ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
+    ; GFX9-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
+    ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 1, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -6877,31 +6877,31 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2s128_align32
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
     ; VI-LABEL: name: test_load_flat_v2s128_align32
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
     ; GFX9-LABEL: name: test_load_flat_v2s128_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x s128>) = G_LOAD %0 :: (load (<2 x s128>), align 32, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -6915,19 +6915,19 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2p1_align16
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; VI-LABEL: name: test_load_flat_v2p1_align16
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-LABEL: name: test_load_flat_v2p1_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 16, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -6941,19 +6941,19 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2p1_align8
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8)
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; VI-LABEL: name: test_load_flat_v2p1_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-LABEL: name: test_load_flat_v2p1_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 8, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -6967,19 +6967,19 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2p1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; VI-LABEL: name: test_load_flat_v2p1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-LABEL: name: test_load_flat_v2p1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -6993,262 +6993,262 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2p1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; VI-LABEL: name: test_load_flat_v2p1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-LABEL: name: test_load_flat_v2p1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 1, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -7262,16 +7262,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2p3_align8
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>))
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>))
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; VI-LABEL: name: test_load_flat_v2p3_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>))
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>))
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; GFX9-LABEL: name: test_load_flat_v2p3_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>))
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>))
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -7285,16 +7285,16 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2p3_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; VI-LABEL: name: test_load_flat_v2p3_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; GFX9-LABEL: name: test_load_flat_v2p3_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 4, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -7308,151 +7308,151 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v2p3_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
     ; VI-LABEL: name: test_load_flat_v2p3_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
     ; GFX9-LABEL: name: test_load_flat_v2p3_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 1, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -7466,16 +7466,16 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_flat_s32_from_1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_ext_load_flat_s32_from_1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_ext_load_flat_s32_from_1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 0)
     $vgpr0 = COPY %1
@@ -7489,16 +7489,16 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_flat_s32_from_2_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_ext_load_flat_s32_from_2_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_ext_load_flat_s32_from_2_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s16), align 4, addrspace 0)
     $vgpr0 = COPY %1
@@ -7513,19 +7513,19 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_flat_s64_from_1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_flat_s64_from_1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_flat_s64_from_1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -7539,19 +7539,19 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_flat_s64_from_2_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_flat_s64_from_2_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_flat_s64_from_2_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -7565,19 +7565,19 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_flat_s64_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_flat_s64_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_flat_s64_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s32), align 4, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -7591,28 +7591,28 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_flat_s128_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; CI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; CI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; VI-LABEL: name: test_ext_load_flat_s128_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; GFX9-LABEL: name: test_ext_load_flat_s128_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_LOAD %0 :: (load (s32), align 4, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -7626,19 +7626,19 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_flat_s64_from_2_align2
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_flat_s64_from_2_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_flat_s64_from_2_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -7652,19 +7652,19 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_flat_s64_from_1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_flat_s64_from_1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_flat_s64_from_1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 0)
     $vgpr0_vgpr1 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
index a9cd9a8295674..53c45177fd6bf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
@@ -31,40 +31,40 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s1_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; SI: $vgpr0 = COPY [[AND]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; SI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s1_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; CI-HSA: $vgpr0 = COPY [[AND]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s1_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; CI-MESA: $vgpr0 = COPY [[AND]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; VI-LABEL: name: test_load_global_s1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; VI: $vgpr0 = COPY [[AND]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; VI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s1_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; GFX9-HSA: $vgpr0 = COPY [[AND]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s1_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; GFX9-MESA: $vgpr0 = COPY [[AND]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[AND]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s1) = G_LOAD %0 :: (load (s1), align 1, addrspace 1)
     %2:_(s32) = G_ZEXT %1
@@ -79,40 +79,40 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s2_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; SI: $vgpr0 = COPY [[AND]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; SI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s2_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; CI-HSA: $vgpr0 = COPY [[AND]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s2_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; CI-MESA: $vgpr0 = COPY [[AND]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; VI-LABEL: name: test_load_global_s2_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; VI: $vgpr0 = COPY [[AND]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; VI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s2_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; GFX9-HSA: $vgpr0 = COPY [[AND]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s2_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; GFX9-MESA: $vgpr0 = COPY [[AND]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[AND]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s2) = G_LOAD %0 :: (load (s2), align 1, addrspace 1)
     %2:_(s32) = G_ZEXT %1
@@ -132,28 +132,28 @@ body: |
     ; CI: $vgpr0 = COPY [[COPY1]](s32)
     ; SI-LABEL: name: test_load_global_s8_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s8_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s8_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_global_s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s8_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s8_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s8) = G_LOAD %0 :: (load (s8), align 4, addrspace 1)
     %2:_(s32) = G_ANYEXT %1
@@ -168,28 +168,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s8_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s8_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s8_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_global_s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s8_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s8_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s8) = G_LOAD %0 :: (load (s8), align 1, addrspace 1)
     %2:_(s32) = G_ANYEXT %1
@@ -204,28 +204,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s16_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s16_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_global_s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s16_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s16_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s16) = G_LOAD %0 :: (load (s16), align 4, addrspace 1)
     %2:_(s32) = G_ANYEXT %1
@@ -240,28 +240,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s16_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s16_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_global_s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s16_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s16_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s16) = G_LOAD %0 :: (load (s16), align 2, addrspace 1)
     %2:_(s32) = G_ANYEXT %1
@@ -298,78 +298,78 @@ body: |
     ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; SI-LABEL: name: test_load_global_s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s16_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s16_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_global_s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s16_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s16_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 1)
     %2:_(s32) = G_ANYEXT %1
@@ -384,28 +384,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s32_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s32_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_global_s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s32_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s32_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1)
     $vgpr0 = COPY %1
@@ -419,64 +419,64 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s32_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: $vgpr0 = COPY [[OR]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s32_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 2, addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 2, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s32_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: $vgpr0 = COPY [[OR]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; VI-LABEL: name: test_load_global_s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: $vgpr0 = COPY [[OR]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s32_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 2, addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 2, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s32_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: $vgpr0 = COPY [[OR]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 1)
     $vgpr0 = COPY %1
@@ -490,120 +490,120 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s32_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s32_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 1, addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 1, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s32_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-MESA: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_global_s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s32_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 1, addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s32_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9-MESA: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 1)
     $vgpr0 = COPY %1
@@ -617,28 +617,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s24_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s24_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s24_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1)
-    ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_global_s24_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s24_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s24_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1)
-    ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 8, addrspace 1)
     %2:_(s32) = G_ANYEXT %1
@@ -653,28 +653,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s24_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s24_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s24_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_global_s24_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s24_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s24_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 1)
     %2:_(s32) = G_ANYEXT %1
@@ -689,144 +689,144 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s24_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s24_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; CI-HSA: $vgpr0 = COPY [[OR]](s32)
+    ; CI-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s24_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-MESA: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_global_s24_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s24_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX9-HSA: $vgpr0 = COPY [[OR]](s32)
+    ; GFX9-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s24_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9-MESA: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 2, addrspace 1)
     %2:_(s32) = G_ANYEXT %1
@@ -841,148 +841,148 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s24_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s24_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; CI-HSA: $vgpr0 = COPY [[OR]](s32)
+    ; CI-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s24_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-MESA: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_global_s24_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s24_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX9-HSA: $vgpr0 = COPY [[OR]](s32)
+    ; GFX9-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s24_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9-MESA: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 1, addrspace 1)
     %2:_(s32) = G_ANYEXT %1
@@ -1004,40 +1004,40 @@ body: |
     ; CI: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; SI-LABEL: name: test_load_global_s48_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
-    ; SI: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
-    ; SI: $vgpr0_vgpr1 = COPY [[AND]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; CI-HSA-LABEL: name: test_load_global_s48_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[AND]](s64)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; CI-MESA-LABEL: name: test_load_global_s48_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; VI-LABEL: name: test_load_global_s48_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
-    ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
-    ; VI: $vgpr0_vgpr1 = COPY [[AND]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; GFX9-HSA-LABEL: name: test_load_global_s48_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
-    ; GFX9-HSA: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[AND]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; GFX9-MESA-LABEL: name: test_load_global_s48_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]]
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s48) = G_LOAD %0 :: (load (s48), align 8, addrspace 1)
     %2:_(s64) = G_ZEXT %1
@@ -1052,28 +1052,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s64_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; CI-HSA-LABEL: name: test_load_global_s64_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; CI-MESA-LABEL: name: test_load_global_s64_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; VI-LABEL: name: test_load_global_s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX9-HSA-LABEL: name: test_load_global_s64_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX9-MESA-LABEL: name: test_load_global_s64_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -1087,28 +1087,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s64_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; CI-HSA-LABEL: name: test_load_global_s64_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; CI-MESA-LABEL: name: test_load_global_s64_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; VI-LABEL: name: test_load_global_s64_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX9-HSA-LABEL: name: test_load_global_s64_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX9-MESA-LABEL: name: test_load_global_s64_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -1122,108 +1122,108 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s64_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; CI-HSA-LABEL: name: test_load_global_s64_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 2, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 2, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; CI-MESA-LABEL: name: test_load_global_s64_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; VI-LABEL: name: test_load_global_s64_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; GFX9-HSA-LABEL: name: test_load_global_s64_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 2, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 2, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX9-MESA-LABEL: name: test_load_global_s64_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -1237,264 +1237,264 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s64_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; CI-HSA-LABEL: name: test_load_global_s64_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 1, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 1, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; CI-MESA-LABEL: name: test_load_global_s64_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-MESA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; VI-LABEL: name: test_load_global_s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; GFX9-HSA-LABEL: name: test_load_global_s64_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 1, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX9-MESA-LABEL: name: test_load_global_s64_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9-MESA: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-MESA-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-MESA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -1508,35 +1508,35 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s96_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-HSA-LABEL: name: test_load_global_s96_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-MESA-LABEL: name: test_load_global_s96_align16
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_global_s96_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-HSA-LABEL: name: test_load_global_s96_align16
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-MESA-LABEL: name: test_load_global_s96_align16
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 16, addrspace 1)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -1550,39 +1550,39 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s96_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, align 8, addrspace 1)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, align 8, addrspace 1)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-HSA-LABEL: name: test_load_global_s96_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-MESA-LABEL: name: test_load_global_s96_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_global_s96_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-HSA-LABEL: name: test_load_global_s96_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-MESA-LABEL: name: test_load_global_s96_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 8, addrspace 1)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -1596,39 +1596,39 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s96_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, addrspace 1)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-HSA-LABEL: name: test_load_global_s96_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-MESA-LABEL: name: test_load_global_s96_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_global_s96_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-HSA-LABEL: name: test_load_global_s96_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-MESA-LABEL: name: test_load_global_s96_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 1)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -1642,146 +1642,146 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s96_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-HSA-LABEL: name: test_load_global_s96_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-MESA-LABEL: name: test_load_global_s96_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_global_s96_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-HSA-LABEL: name: test_load_global_s96_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-MESA-LABEL: name: test_load_global_s96_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 1)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -1795,282 +1795,282 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s96_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-HSA-LABEL: name: test_load_global_s96_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-MESA-LABEL: name: test_load_global_s96_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; CI-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; CI-MESA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-MESA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_global_s96_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-HSA-LABEL: name: test_load_global_s96_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-MESA-LABEL: name: test_load_global_s96_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-MESA-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-MESA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 1)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -2089,64 +2089,64 @@ body: |
     ; CI: S_NOP 0, implicit [[TRUNC]](s160)
     ; SI-LABEL: name: test_load_global_s160_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
-    ; SI: S_NOP 0, implicit [[BITCAST]](s160)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
+    ; SI-NEXT: S_NOP 0, implicit [[BITCAST]](s160)
     ; CI-HSA-LABEL: name: test_load_global_s160_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
-    ; CI-HSA: S_NOP 0, implicit [[BITCAST]](s160)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1)
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
+    ; CI-HSA-NEXT: S_NOP 0, implicit [[BITCAST]](s160)
     ; CI-MESA-LABEL: name: test_load_global_s160_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
-    ; CI-MESA: S_NOP 0, implicit [[BITCAST]](s160)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1)
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
+    ; CI-MESA-NEXT: S_NOP 0, implicit [[BITCAST]](s160)
     ; VI-LABEL: name: test_load_global_s160_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
-    ; VI: S_NOP 0, implicit [[BITCAST]](s160)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
+    ; VI-NEXT: S_NOP 0, implicit [[BITCAST]](s160)
     ; GFX9-HSA-LABEL: name: test_load_global_s160_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
-    ; GFX9-HSA: S_NOP 0, implicit [[BITCAST]](s160)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1)
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
+    ; GFX9-HSA-NEXT: S_NOP 0, implicit [[BITCAST]](s160)
     ; GFX9-MESA-LABEL: name: test_load_global_s160_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
-    ; GFX9-MESA: S_NOP 0, implicit [[BITCAST]](s160)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1)
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
+    ; GFX9-MESA-NEXT: S_NOP 0, implicit [[BITCAST]](s160)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s160) = G_LOAD %0 :: (load (s160), align 4, addrspace 1)
     S_NOP 0, implicit %1
@@ -2160,121 +2160,121 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s224_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s32>) from unknown-address + 16, align 4, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32) from unknown-address + 24, addrspace 1)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
-    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[UV4]](s32)
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV]](s32), [[UV1]](s32)
-    ; SI: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD2]](s32), [[DEF]](s32), [[DEF]](s32)
-    ; SI: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
-    ; SI: [[UV6:%[0-9]+]]:_(<7 x s32>), [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV6]](<7 x s32>)
-    ; SI: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s32>) from unknown-address + 16, align 4, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32) from unknown-address + 24, addrspace 1)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[UV4]](s32)
+    ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV]](s32), [[UV1]](s32)
+    ; SI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD2]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; SI-NEXT: [[UV6:%[0-9]+]]:_(<7 x s32>), [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV6]](<7 x s32>)
+    ; SI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     ; CI-HSA-LABEL: name: test_load_global_s224_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; CI-HSA: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
-    ; CI-HSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
-    ; CI-HSA: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
-    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
-    ; CI-HSA: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
-    ; CI-HSA: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; CI-HSA: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; CI-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; CI-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; CI-HSA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; CI-HSA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; CI-HSA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     ; CI-MESA-LABEL: name: test_load_global_s224_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; CI-MESA: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
-    ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
-    ; CI-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
-    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
-    ; CI-MESA: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
-    ; CI-MESA: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; CI-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; CI-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; CI-MESA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; CI-MESA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; CI-MESA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     ; VI-LABEL: name: test_load_global_s224_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
-    ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
-    ; VI: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
-    ; VI: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
-    ; VI: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; VI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; VI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     ; GFX9-HSA-LABEL: name: test_load_global_s224_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; GFX9-HSA: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
-    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
-    ; GFX9-HSA: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
-    ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; GFX9-HSA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; GFX9-HSA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; GFX9-HSA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     ; GFX9-MESA-LABEL: name: test_load_global_s224_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; GFX9-MESA: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
-    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
-    ; GFX9-MESA: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
-    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; GFX9-MESA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; GFX9-MESA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; GFX9-MESA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 1)
      %2:_(s256) = G_IMPLICIT_DEF
@@ -2291,34 +2291,34 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s128_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-HSA-LABEL: name: test_load_global_s128_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-MESA-LABEL: name: test_load_global_s128_align16
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_global_s128_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-HSA-LABEL: name: test_load_global_s128_align16
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-MESA-LABEL: name: test_load_global_s128_align16
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 16, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -2336,34 +2336,34 @@ body: |
     ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
     ; SI-LABEL: name: test_load_global_s128_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-HSA-LABEL: name: test_load_global_s128_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-MESA-LABEL: name: test_load_global_s128_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_global_s128_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-HSA-LABEL: name: test_load_global_s128_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-MESA-LABEL: name: test_load_global_s128_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -2377,358 +2377,358 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s128_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-HSA-LABEL: name: test_load_global_s128_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-MESA-LABEL: name: test_load_global_s128_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; CI-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; CI-MESA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-MESA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; CI-MESA-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; CI-MESA-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-MESA-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-MESA-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-MESA-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_global_s128_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-HSA-LABEL: name: test_load_global_s128_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-MESA-LABEL: name: test_load_global_s128_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-MESA-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-MESA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-MESA-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-MESA-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-MESA-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -2742,34 +2742,34 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s256_align32
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
     ; CI-HSA-LABEL: name: test_load_global_s256_align32
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
     ; CI-MESA-LABEL: name: test_load_global_s256_align32
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
     ; VI-LABEL: name: test_load_global_s256_align32
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
     ; GFX9-HSA-LABEL: name: test_load_global_s256_align32
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
     ; GFX9-MESA-LABEL: name: test_load_global_s256_align32
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s256) = G_LOAD %0 :: (load (s256), align 16, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -2783,28 +2783,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_p1_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; CI-HSA-LABEL: name: test_load_global_p1_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; CI-MESA-LABEL: name: test_load_global_p1_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; VI-LABEL: name: test_load_global_p1_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX9-HSA-LABEL: name: test_load_global_p1_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX9-MESA-LABEL: name: test_load_global_p1_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -2818,28 +2818,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_p1_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; CI-HSA-LABEL: name: test_load_global_p1_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; CI-MESA-LABEL: name: test_load_global_p1_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; VI-LABEL: name: test_load_global_p1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX9-HSA-LABEL: name: test_load_global_p1_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX9-MESA-LABEL: name: test_load_global_p1_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -2853,264 +2853,264 @@ body: |
 
     ; SI-LABEL: name: test_load_global_p1_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; CI-HSA-LABEL: name: test_load_global_p1_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 1, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 1, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; CI-MESA-LABEL: name: test_load_global_p1_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-MESA-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; VI-LABEL: name: test_load_global_p1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; GFX9-HSA-LABEL: name: test_load_global_p1_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 1, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX9-MESA-LABEL: name: test_load_global_p1_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9-MESA: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-MESA-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-MESA-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -3124,28 +3124,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_p3_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1)
-    ; SI: $vgpr0 = COPY [[LOAD]](p3)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; CI-HSA-LABEL: name: test_load_global_p3_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](p3)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; CI-MESA-LABEL: name: test_load_global_p3_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1)
-    ; CI-MESA: $vgpr0 = COPY [[LOAD]](p3)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; VI-LABEL: name: test_load_global_p3_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1)
-    ; VI: $vgpr0 = COPY [[LOAD]](p3)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; GFX9-HSA-LABEL: name: test_load_global_p3_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; GFX9-MESA-LABEL: name: test_load_global_p3_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1)
-    ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](p3)
      %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 1)
     $vgpr0 = COPY %1
@@ -3163,28 +3163,28 @@ body: |
     ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; SI-LABEL: name: test_load_global_p4_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; CI-HSA-LABEL: name: test_load_global_p4_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; CI-MESA-LABEL: name: test_load_global_p4_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; VI-LABEL: name: test_load_global_p4_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; GFX9-HSA-LABEL: name: test_load_global_p4_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; GFX9-MESA-LABEL: name: test_load_global_p4_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p4) = G_LOAD %0 :: (load (p4), align 8, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -3198,28 +3198,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_p4_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; CI-HSA-LABEL: name: test_load_global_p4_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; CI-MESA-LABEL: name: test_load_global_p4_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; VI-LABEL: name: test_load_global_p4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; GFX9-HSA-LABEL: name: test_load_global_p4_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; GFX9-MESA-LABEL: name: test_load_global_p4_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p4) = G_LOAD %0 :: (load (p4), align 4, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -3233,108 +3233,108 @@ body: |
 
     ; SI-LABEL: name: test_load_global_p4_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     ; CI-HSA-LABEL: name: test_load_global_p4_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 2, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 2, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; CI-MESA-LABEL: name: test_load_global_p4_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     ; VI-LABEL: name: test_load_global_p4_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     ; GFX9-HSA-LABEL: name: test_load_global_p4_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 2, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 2, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; GFX9-MESA-LABEL: name: test_load_global_p4_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p4) = G_LOAD %0 :: (load (p4), align 2, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -3348,264 +3348,264 @@ body: |
 
     ; SI-LABEL: name: test_load_global_p4_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; SI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     ; CI-HSA-LABEL: name: test_load_global_p4_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 1, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 1, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; CI-MESA-LABEL: name: test_load_global_p4_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-MESA-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     ; VI-LABEL: name: test_load_global_p4_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     ; GFX9-HSA-LABEL: name: test_load_global_p4_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 1, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; GFX9-MESA-LABEL: name: test_load_global_p4_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9-MESA: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-MESA-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-MESA-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p4) = G_LOAD %0 :: (load (p4), align 1, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -3619,28 +3619,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_p5_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1)
-    ; SI: $vgpr0 = COPY [[LOAD]](p5)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; CI-HSA-LABEL: name: test_load_global_p5_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](p5)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; CI-MESA-LABEL: name: test_load_global_p5_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1)
-    ; CI-MESA: $vgpr0 = COPY [[LOAD]](p5)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; VI-LABEL: name: test_load_global_p5_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1)
-    ; VI: $vgpr0 = COPY [[LOAD]](p5)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; GFX9-HSA-LABEL: name: test_load_global_p5_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](p5)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; GFX9-MESA-LABEL: name: test_load_global_p5_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1)
-    ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](p5)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 1)
     $vgpr0 = COPY %1
@@ -3654,68 +3654,68 @@ body: |
 
     ; SI-LABEL: name: test_load_global_p5_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; SI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; CI-HSA-LABEL: name: test_load_global_p5_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 2, addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](p5)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 2, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; CI-MESA-LABEL: name: test_load_global_p5_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; CI-MESA: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; VI-LABEL: name: test_load_global_p5_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; VI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; GFX9-HSA-LABEL: name: test_load_global_p5_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 2, addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](p5)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 2, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; GFX9-MESA-LABEL: name: test_load_global_p5_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; GFX9-MESA: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 1)
     $vgpr0 = COPY %1
@@ -3729,124 +3729,124 @@ body: |
 
     ; SI-LABEL: name: test_load_global_p5_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; SI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; CI-HSA-LABEL: name: test_load_global_p5_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 1, addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](p5)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 1, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; CI-MESA-LABEL: name: test_load_global_p5_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; CI-MESA: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; VI-LABEL: name: test_load_global_p5_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; VI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; GFX9-HSA-LABEL: name: test_load_global_p5_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 1, addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](p5)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; GFX9-MESA-LABEL: name: test_load_global_p5_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; GFX9-MESA: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 1)
     $vgpr0 = COPY %1
@@ -3860,97 +3860,97 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2s8_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-HSA-LABEL: name: test_load_global_v2s8_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-MESA-LABEL: name: test_load_global_v2s8_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_global_v2s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-HSA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9-HSA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 4, addrspace 1)
     %2:_(s16) = G_BITCAST %1
@@ -3966,97 +3966,97 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2s8_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-HSA-LABEL: name: test_load_global_v2s8_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-MESA-LABEL: name: test_load_global_v2s8_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_global_v2s8_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-HSA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9-HSA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 1)
     %2:_(s16) = G_BITCAST %1
@@ -4072,101 +4072,101 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2s8_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-HSA-LABEL: name: test_load_global_v2s8_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-MESA-LABEL: name: test_load_global_v2s8_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_global_v2s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-HSA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9-HSA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 1)
     %2:_(s16) = G_BITCAST %1
@@ -4182,172 +4182,172 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v3s8_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-HSA-LABEL: name: test_load_global_v3s8_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-HSA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI-HSA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-HSA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-HSA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-HSA: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-HSA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-HSA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-MESA-LABEL: name: test_load_global_v3s8_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-MESA: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_global_v3s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s8_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-HSA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9-HSA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-HSA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; GFX9-HSA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9-HSA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9-HSA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9-HSA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9-HSA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-HSA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9-HSA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; GFX9-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9-HSA: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX9-HSA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-HSA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-HSA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-HSA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-HSA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s8_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9-MESA: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 4, addrspace 1)
     %2:_(s24) = G_BITCAST %1
@@ -4364,194 +4364,194 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v3s8_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-HSA-LABEL: name: test_load_global_v3s8_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32)
-    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI-HSA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32)
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
-    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-HSA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
-    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI-HSA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-HSA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-HSA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; CI-HSA: $vgpr0 = COPY [[OR3]](s32)
+    ; CI-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32)
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-HSA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-HSA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[OR3]](s32)
     ; CI-MESA-LABEL: name: test_load_global_v3s8_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-MESA: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_global_v3s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s8_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32)
-    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32)
-    ; GFX9-HSA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
-    ; GFX9-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9-HSA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
-    ; GFX9-HSA: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-HSA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
-    ; GFX9-HSA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]]
-    ; GFX9-HSA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; GFX9-HSA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
-    ; GFX9-HSA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9-HSA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
-    ; GFX9-HSA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
-    ; GFX9-HSA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]]
-    ; GFX9-HSA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9-HSA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; GFX9-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; GFX9-HSA: $vgpr0 = COPY [[OR3]](s32)
+    ; GFX9-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32)
+    ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; GFX9-HSA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]]
+    ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX9-HSA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-HSA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; GFX9-HSA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]]
+    ; GFX9-HSA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-HSA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR3]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s8_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9-MESA: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 1)
     %2:_(s24) = G_BITCAST %1
@@ -4567,80 +4567,80 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v4s8_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     ; CI-HSA-LABEL: name: test_load_global_v4s8_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
+    ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     ; CI-MESA-LABEL: name: test_load_global_v4s8_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     ; VI-LABEL: name: test_load_global_v4s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LSHR]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LSHR]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
+    ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LSHR]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LSHR]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
+    ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 1)
     $vgpr0 = COPY %1
@@ -4673,80 +4673,80 @@ body: |
     ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     ; SI-LABEL: name: test_load_global_v4s8_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LOAD1]](s32), [[LSHR1]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LOAD1]](s32), [[LSHR1]](s32)
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     ; CI-HSA-LABEL: name: test_load_global_v4s8_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 2, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 2, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
+    ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     ; CI-MESA-LABEL: name: test_load_global_v4s8_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LOAD1]](s32), [[LSHR1]](s32)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LOAD1]](s32), [[LSHR1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     ; VI-LABEL: name: test_load_global_v4s8_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LOAD1]](s32), [[LSHR1]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LOAD1]](s32), [[LSHR1]](s32)
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 2, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LSHR]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 2, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LSHR]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
+    ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LSHR]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD1]](s32), [[LSHR1]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LSHR]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD1]](s32), [[LSHR1]](s32)
+    ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 2, addrspace 1)
     $vgpr0 = COPY %1
@@ -4760,92 +4760,92 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v4s8_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     ; CI-HSA-LABEL: name: test_load_global_v4s8_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 1, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32)
+    ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     ; CI-MESA-LABEL: name: test_load_global_v4s8_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     ; VI-LABEL: name: test_load_global_v4s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 1, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LSHR]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LSHR]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
+    ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 1, addrspace 1)
     $vgpr0 = COPY %1
@@ -4859,220 +4859,220 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v8s8_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v8s8_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; CI-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; CI-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; CI-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; CI-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-HSA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-MESA-LABEL: name: test_load_global_v8s8_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-MESA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_global_v8s8_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-HSA-LABEL: name: test_load_global_v8s8_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; GFX9-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; GFX9-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX9-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX9-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX9-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; GFX9-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; GFX9-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; GFX9-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; GFX9-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; GFX9-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; GFX9-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; GFX9-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-MESA-LABEL: name: test_load_global_v8s8_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 1)
     %2:_(<2 x s32>) = G_BITCAST %1
@@ -5087,376 +5087,376 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v16s8_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v16s8_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; CI-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CI-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; CI-HSA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; CI-HSA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; CI-HSA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; CI-HSA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; CI-HSA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; CI-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; CI-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; CI-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
-    ; CI-HSA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
-    ; CI-HSA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
-    ; CI-HSA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI-HSA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
-    ; CI-HSA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
-    ; CI-HSA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI-HSA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
-    ; CI-HSA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; CI-HSA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-HSA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
-    ; CI-HSA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
-    ; CI-HSA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
-    ; CI-HSA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI-HSA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
-    ; CI-HSA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
-    ; CI-HSA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI-HSA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
-    ; CI-HSA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
-    ; CI-HSA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-HSA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CI-HSA-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; CI-HSA-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-HSA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
+    ; CI-HSA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-HSA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-HSA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-HSA-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
+    ; CI-HSA-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-HSA-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-HSA-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; CI-MESA-LABEL: name: test_load_global_v16s8_align16
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CI-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; CI-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; CI-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; CI-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; CI-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; CI-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
-    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
-    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
-    ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
-    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
-    ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
-    ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
-    ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
-    ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
-    ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
-    ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
-    ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
-    ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
-    ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-MESA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CI-MESA-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; CI-MESA-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-MESA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
+    ; CI-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-MESA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-MESA-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
+    ; CI-MESA-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-MESA-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-MESA-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; VI-LABEL: name: test_load_global_v16s8_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-HSA-LABEL: name: test_load_global_v16s8_align16
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; GFX9-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX9-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; GFX9-HSA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; GFX9-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX9-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX9-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX9-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; GFX9-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; GFX9-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; GFX9-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; GFX9-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; GFX9-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; GFX9-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
-    ; GFX9-HSA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
-    ; GFX9-HSA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
-    ; GFX9-HSA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9-HSA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
-    ; GFX9-HSA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
-    ; GFX9-HSA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9-HSA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
-    ; GFX9-HSA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; GFX9-HSA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9-HSA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
-    ; GFX9-HSA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
-    ; GFX9-HSA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
-    ; GFX9-HSA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9-HSA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
-    ; GFX9-HSA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
-    ; GFX9-HSA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9-HSA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
-    ; GFX9-HSA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
-    ; GFX9-HSA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; GFX9-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-HSA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
+    ; GFX9-HSA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-HSA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-HSA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-HSA-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
+    ; GFX9-HSA-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-HSA-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-HSA-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-MESA-LABEL: name: test_load_global_v16s8_align16
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX9-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; GFX9-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
-    ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
-    ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
-    ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
-    ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
-    ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
-    ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
-    ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
-    ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
-    ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
-    ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
-    ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
-    ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
-    ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-MESA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-MESA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-MESA-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-MESA-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-MESA-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 16, addrspace 1)
     %2:_(<4 x s32>) = G_BITCAST %1
@@ -5471,688 +5471,688 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v32s8_align32
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; SI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; SI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; SI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; SI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; SI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]]
-    ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]]
-    ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
-    ; SI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C3]]
-    ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
-    ; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C3]]
-    ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
-    ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]]
-    ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]]
-    ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32)
-    ; SI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C3]]
-    ; SI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C1]](s32)
-    ; SI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C3]]
-    ; SI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C2]](s32)
-    ; SI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; SI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C3]]
-    ; SI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]]
-    ; SI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32)
-    ; SI: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
-    ; SI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C3]]
-    ; SI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C1]](s32)
-    ; SI: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
-    ; SI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C3]]
-    ; SI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C2]](s32)
-    ; SI: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
-    ; SI: [[AND28:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C3]]
-    ; SI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]]
-    ; SI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
-    ; SI: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
-    ; SI: [[AND30:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C3]]
-    ; SI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C1]](s32)
-    ; SI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
-    ; SI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C3]]
-    ; SI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C2]](s32)
-    ; SI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]]
+    ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]]
+    ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
+    ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C3]]
+    ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
+    ; SI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; SI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C3]]
+    ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
+    ; SI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; SI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]]
+    ; SI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]]
+    ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32)
+    ; SI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; SI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C3]]
+    ; SI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C1]](s32)
+    ; SI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; SI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C3]]
+    ; SI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C2]](s32)
+    ; SI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; SI-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C3]]
+    ; SI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]]
+    ; SI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32)
+    ; SI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
+    ; SI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C3]]
+    ; SI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C1]](s32)
+    ; SI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
+    ; SI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C3]]
+    ; SI-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C2]](s32)
+    ; SI-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
+    ; SI-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C3]]
+    ; SI-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]]
+    ; SI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
+    ; SI-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
+    ; SI-NEXT: [[AND30:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C3]]
+    ; SI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C1]](s32)
+    ; SI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
+    ; SI-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C3]]
+    ; SI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C2]](s32)
+    ; SI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v32s8_align32
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; CI-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CI-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; CI-HSA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; CI-HSA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; CI-HSA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; CI-HSA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; CI-HSA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; CI-HSA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; CI-HSA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; CI-HSA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; CI-HSA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; CI-HSA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; CI-HSA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; CI-HSA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; CI-HSA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; CI-HSA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; CI-HSA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; CI-HSA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; CI-HSA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; CI-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; CI-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; CI-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
-    ; CI-HSA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
-    ; CI-HSA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
-    ; CI-HSA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI-HSA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
-    ; CI-HSA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
-    ; CI-HSA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI-HSA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
-    ; CI-HSA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; CI-HSA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-HSA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
-    ; CI-HSA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
-    ; CI-HSA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
-    ; CI-HSA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI-HSA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
-    ; CI-HSA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
-    ; CI-HSA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI-HSA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
-    ; CI-HSA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
-    ; CI-HSA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI-HSA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]]
-    ; CI-HSA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]]
-    ; CI-HSA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
-    ; CI-HSA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; CI-HSA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C3]]
-    ; CI-HSA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
-    ; CI-HSA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; CI-HSA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C3]]
-    ; CI-HSA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
-    ; CI-HSA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; CI-HSA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]]
-    ; CI-HSA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]]
-    ; CI-HSA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32)
-    ; CI-HSA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; CI-HSA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C3]]
-    ; CI-HSA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C1]](s32)
-    ; CI-HSA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; CI-HSA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C3]]
-    ; CI-HSA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C2]](s32)
-    ; CI-HSA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; CI-HSA: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C3]]
-    ; CI-HSA: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]]
-    ; CI-HSA: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32)
-    ; CI-HSA: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
-    ; CI-HSA: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C3]]
-    ; CI-HSA: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C1]](s32)
-    ; CI-HSA: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
-    ; CI-HSA: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C3]]
-    ; CI-HSA: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C2]](s32)
-    ; CI-HSA: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
-    ; CI-HSA: [[AND28:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C3]]
-    ; CI-HSA: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]]
-    ; CI-HSA: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
-    ; CI-HSA: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
-    ; CI-HSA: [[AND30:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C3]]
-    ; CI-HSA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C1]](s32)
-    ; CI-HSA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
-    ; CI-HSA: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C3]]
-    ; CI-HSA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C2]](s32)
-    ; CI-HSA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
-    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-HSA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CI-HSA-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; CI-HSA-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; CI-HSA-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; CI-HSA-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; CI-HSA-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; CI-HSA-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-HSA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
+    ; CI-HSA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-HSA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-HSA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-HSA-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
+    ; CI-HSA-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-HSA-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-HSA-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-HSA-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]]
+    ; CI-HSA-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; CI-HSA-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; CI-HSA-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; CI-HSA-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]]
+    ; CI-HSA-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; CI-HSA-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; CI-HSA-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; CI-HSA-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C3]]
+    ; CI-HSA-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
+    ; CI-HSA-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
+    ; CI-HSA-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
+    ; CI-HSA-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C3]]
+    ; CI-HSA-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
+    ; CI-HSA-NEXT: [[AND30:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
+    ; CI-HSA-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     ; CI-MESA-LABEL: name: test_load_global_v32s8_align32
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CI-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; CI-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; CI-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; CI-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; CI-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; CI-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; CI-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; CI-MESA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; CI-MESA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; CI-MESA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; CI-MESA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; CI-MESA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; CI-MESA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; CI-MESA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; CI-MESA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; CI-MESA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; CI-MESA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; CI-MESA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
-    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
-    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
-    ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
-    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
-    ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
-    ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
-    ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
-    ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
-    ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
-    ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
-    ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
-    ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
-    ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]]
-    ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]]
-    ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
-    ; CI-MESA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; CI-MESA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C3]]
-    ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
-    ; CI-MESA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C3]]
-    ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
-    ; CI-MESA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; CI-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]]
-    ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]]
-    ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32)
-    ; CI-MESA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; CI-MESA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C3]]
-    ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C1]](s32)
-    ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C3]]
-    ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C2]](s32)
-    ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; CI-MESA: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C3]]
-    ; CI-MESA: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]]
-    ; CI-MESA: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32)
-    ; CI-MESA: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
-    ; CI-MESA: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C3]]
-    ; CI-MESA: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C1]](s32)
-    ; CI-MESA: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
-    ; CI-MESA: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C3]]
-    ; CI-MESA: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C2]](s32)
-    ; CI-MESA: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
-    ; CI-MESA: [[AND28:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C3]]
-    ; CI-MESA: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]]
-    ; CI-MESA: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
-    ; CI-MESA: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
-    ; CI-MESA: [[AND30:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C3]]
-    ; CI-MESA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C1]](s32)
-    ; CI-MESA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
-    ; CI-MESA: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C3]]
-    ; CI-MESA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C2]](s32)
-    ; CI-MESA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-MESA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CI-MESA-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; CI-MESA-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; CI-MESA-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; CI-MESA-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; CI-MESA-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; CI-MESA-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-MESA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
+    ; CI-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-MESA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-MESA-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
+    ; CI-MESA-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-MESA-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-MESA-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-MESA-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]]
+    ; CI-MESA-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; CI-MESA-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; CI-MESA-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; CI-MESA-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]]
+    ; CI-MESA-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; CI-MESA-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; CI-MESA-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; CI-MESA-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C3]]
+    ; CI-MESA-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
+    ; CI-MESA-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
+    ; CI-MESA-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
+    ; CI-MESA-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C3]]
+    ; CI-MESA-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
+    ; CI-MESA-NEXT: [[AND30:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
+    ; CI-MESA-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     ; VI-LABEL: name: test_load_global_v32s8_align32
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; VI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; VI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; VI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; VI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]]
-    ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]]
-    ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
-    ; VI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C3]]
-    ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
-    ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C3]]
-    ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
-    ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]]
-    ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]]
-    ; VI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32)
-    ; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C3]]
-    ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C1]](s32)
-    ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C3]]
-    ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C2]](s32)
-    ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; VI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C3]]
-    ; VI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]]
-    ; VI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32)
-    ; VI: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
-    ; VI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C3]]
-    ; VI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C1]](s32)
-    ; VI: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
-    ; VI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C3]]
-    ; VI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C2]](s32)
-    ; VI: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
-    ; VI: [[AND28:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C3]]
-    ; VI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]]
-    ; VI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
-    ; VI: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
-    ; VI: [[AND30:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C3]]
-    ; VI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C1]](s32)
-    ; VI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
-    ; VI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C3]]
-    ; VI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C2]](s32)
-    ; VI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]]
+    ; VI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]]
+    ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
+    ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; VI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C3]]
+    ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
+    ; VI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; VI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C3]]
+    ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
+    ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; VI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]]
+    ; VI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]]
+    ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32)
+    ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; VI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C3]]
+    ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C1]](s32)
+    ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; VI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C3]]
+    ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C2]](s32)
+    ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; VI-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C3]]
+    ; VI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]]
+    ; VI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32)
+    ; VI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
+    ; VI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C3]]
+    ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C1]](s32)
+    ; VI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
+    ; VI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C3]]
+    ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C2]](s32)
+    ; VI-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
+    ; VI-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C3]]
+    ; VI-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]]
+    ; VI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
+    ; VI-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
+    ; VI-NEXT: [[AND30:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C3]]
+    ; VI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C1]](s32)
+    ; VI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
+    ; VI-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C3]]
+    ; VI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C2]](s32)
+    ; VI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     ; GFX9-HSA-LABEL: name: test_load_global_v32s8_align32
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; GFX9-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX9-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; GFX9-HSA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; GFX9-HSA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; GFX9-HSA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; GFX9-HSA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; GFX9-HSA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; GFX9-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX9-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX9-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX9-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; GFX9-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; GFX9-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; GFX9-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; GFX9-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; GFX9-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; GFX9-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
-    ; GFX9-HSA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
-    ; GFX9-HSA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
-    ; GFX9-HSA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9-HSA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
-    ; GFX9-HSA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
-    ; GFX9-HSA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9-HSA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
-    ; GFX9-HSA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; GFX9-HSA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9-HSA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
-    ; GFX9-HSA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
-    ; GFX9-HSA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
-    ; GFX9-HSA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9-HSA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
-    ; GFX9-HSA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
-    ; GFX9-HSA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9-HSA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
-    ; GFX9-HSA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
-    ; GFX9-HSA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9-HSA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]]
-    ; GFX9-HSA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]]
-    ; GFX9-HSA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
-    ; GFX9-HSA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; GFX9-HSA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C3]]
-    ; GFX9-HSA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
-    ; GFX9-HSA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; GFX9-HSA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C3]]
-    ; GFX9-HSA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
-    ; GFX9-HSA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; GFX9-HSA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]]
-    ; GFX9-HSA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]]
-    ; GFX9-HSA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32)
-    ; GFX9-HSA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; GFX9-HSA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C3]]
-    ; GFX9-HSA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C1]](s32)
-    ; GFX9-HSA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; GFX9-HSA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C3]]
-    ; GFX9-HSA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C2]](s32)
-    ; GFX9-HSA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; GFX9-HSA: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C3]]
-    ; GFX9-HSA: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]]
-    ; GFX9-HSA: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32)
-    ; GFX9-HSA: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
-    ; GFX9-HSA: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C3]]
-    ; GFX9-HSA: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C1]](s32)
-    ; GFX9-HSA: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
-    ; GFX9-HSA: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C3]]
-    ; GFX9-HSA: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C2]](s32)
-    ; GFX9-HSA: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
-    ; GFX9-HSA: [[AND28:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C3]]
-    ; GFX9-HSA: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]]
-    ; GFX9-HSA: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
-    ; GFX9-HSA: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
-    ; GFX9-HSA: [[AND30:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C3]]
-    ; GFX9-HSA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C1]](s32)
-    ; GFX9-HSA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
-    ; GFX9-HSA: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C3]]
-    ; GFX9-HSA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C2]](s32)
-    ; GFX9-HSA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
-    ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; GFX9-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-HSA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
+    ; GFX9-HSA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-HSA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-HSA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-HSA-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
+    ; GFX9-HSA-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-HSA-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-HSA-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-HSA-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]]
+    ; GFX9-HSA-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; GFX9-HSA-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; GFX9-HSA-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; GFX9-HSA-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]]
+    ; GFX9-HSA-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; GFX9-HSA-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; GFX9-HSA-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; GFX9-HSA-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C3]]
+    ; GFX9-HSA-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
+    ; GFX9-HSA-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
+    ; GFX9-HSA-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
+    ; GFX9-HSA-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C3]]
+    ; GFX9-HSA-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
+    ; GFX9-HSA-NEXT: [[AND30:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
+    ; GFX9-HSA-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C3]]
+    ; GFX9-HSA-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     ; GFX9-MESA-LABEL: name: test_load_global_v32s8_align32
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX9-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; GFX9-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; GFX9-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; GFX9-MESA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; GFX9-MESA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; GFX9-MESA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
-    ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
-    ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
-    ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
-    ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
-    ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
-    ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
-    ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
-    ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
-    ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
-    ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
-    ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
-    ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
-    ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]]
-    ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]]
-    ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
-    ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C3]]
-    ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
-    ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C3]]
-    ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
-    ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]]
-    ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]]
-    ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32)
-    ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C3]]
-    ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C1]](s32)
-    ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C3]]
-    ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C2]](s32)
-    ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; GFX9-MESA: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C3]]
-    ; GFX9-MESA: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]]
-    ; GFX9-MESA: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32)
-    ; GFX9-MESA: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
-    ; GFX9-MESA: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C3]]
-    ; GFX9-MESA: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C1]](s32)
-    ; GFX9-MESA: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
-    ; GFX9-MESA: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C3]]
-    ; GFX9-MESA: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C2]](s32)
-    ; GFX9-MESA: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
-    ; GFX9-MESA: [[AND28:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C3]]
-    ; GFX9-MESA: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]]
-    ; GFX9-MESA: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
-    ; GFX9-MESA: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
-    ; GFX9-MESA: [[AND30:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C3]]
-    ; GFX9-MESA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C1]](s32)
-    ; GFX9-MESA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
-    ; GFX9-MESA: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C3]]
-    ; GFX9-MESA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C2]](s32)
-    ; GFX9-MESA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-MESA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-MESA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-MESA-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-MESA-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-MESA-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-MESA-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; GFX9-MESA-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; GFX9-MESA-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; GFX9-MESA-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; GFX9-MESA-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; GFX9-MESA-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; GFX9-MESA-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
+    ; GFX9-MESA-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
+    ; GFX9-MESA-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
+    ; GFX9-MESA-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
+    ; GFX9-MESA-NEXT: [[AND30:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
+    ; GFX9-MESA-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<32 x s8>) = G_LOAD %0 :: (load (<32 x s8>), align 32, addrspace 1)
     %2:_(<8 x s32>) = G_BITCAST %1
@@ -6168,28 +6168,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; SI: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v2s16_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v2s16_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; CI-MESA: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v2s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; VI: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s16_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s16_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 1)
     $vgpr0 = COPY %1
@@ -6203,62 +6203,62 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v2s16_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v2s16_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-MESA: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v2s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s16_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s16_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 1)
     $vgpr0 = COPY %1
@@ -6309,141 +6309,141 @@ body: |
     ; CI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
     ; SI-LABEL: name: test_load_global_v2s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v2s16_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v2s16_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-MESA: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v2s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s16_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s16_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 1)
     $vgpr0 = COPY %1
@@ -6457,164 +6457,164 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v3s16_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v3s16_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI-HSA: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-HSA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-HSA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v3s16_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI-MESA: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_global_v3s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9-HSA: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9-HSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; GFX9-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
+    ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9-MESA: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
+    ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 1)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -6636,176 +6636,176 @@ body: |
     ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; SI-LABEL: name: test_load_global_v3s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v3s16_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v3s16_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_global_v3s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 1)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -6821,176 +6821,176 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v3s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v3s16_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v3s16_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_global_v3s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 1)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -7006,293 +7006,293 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v3s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C7]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
-    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C7]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v3s16_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v3s16_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
-    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C7]]
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
-    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
+    ; CI-MESA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C7]]
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_global_v3s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C6]]
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C6]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C6]]
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C6]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
-    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9-MESA: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 1)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -7308,28 +7308,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v4s16_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v4s16_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v4s16_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; VI-LABEL: name: test_load_global_v4s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -7343,28 +7343,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v4s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v4s16_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v4s16_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; VI-LABEL: name: test_load_global_v4s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -7378,106 +7378,106 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v4s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v4s16_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 2, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 2, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v4s16_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; VI-LABEL: name: test_load_global_v4s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 2, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 2, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -7491,251 +7491,251 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v4s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
-    ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
+    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v4s16_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 1, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 1, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v4s16_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
-    ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; CI-MESA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
+    ; CI-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; VI-LABEL: name: test_load_global_v4s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C4]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C4]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 1, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9-MESA: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9-MESA: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9-MESA-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
+    ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -7749,176 +7749,176 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v5s16_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; SI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[BITCAST7]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
+    ; SI-NEXT: $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
+    ; SI-NEXT: $vgpr2 = COPY [[BITCAST7]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v5s16_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
-    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; CI-HSA: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-HSA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-HSA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-HSA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-HSA: $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[BITCAST7]](<2 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
+    ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST7]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v5s16_align16
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; CI-MESA: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-MESA: $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[BITCAST7]](<2 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST7]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v5s16_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[BITCAST7]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
+    ; VI-NEXT: $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
+    ; VI-NEXT: $vgpr2 = COPY [[BITCAST7]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align16
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
-    ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; GFX9-HSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; GFX9-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; GFX9-HSA: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; GFX9-HSA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[BITCAST4]](s32)
-    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
+    ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; GFX9-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[BITCAST4]](s32)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align16
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
-    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; GFX9-MESA: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; GFX9-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[BITCAST4]](s32)
-    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
+    ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; GFX9-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[BITCAST4]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 16, addrspace 1)
     %2:_(<5 x s16>) = G_IMPLICIT_DEF
@@ -7938,200 +7938,200 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v5s16_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[BITCAST5]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
+    ; SI-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
+    ; SI-NEXT: $vgpr2 = COPY [[BITCAST5]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v5s16_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
-    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
+    ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v5s16_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v5s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[BITCAST]](s32)
-    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[BITCAST]](s32)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[BITCAST]](s32)
-    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[BITCAST]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 8, addrspace 1)
     %2:_(<5 x s16>) = G_IMPLICIT_DEF
@@ -8151,200 +8151,200 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v5s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[BITCAST5]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
+    ; SI-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
+    ; SI-NEXT: $vgpr2 = COPY [[BITCAST5]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v5s16_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
-    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
+    ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v5s16_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v5s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[BITCAST]](s32)
-    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[BITCAST]](s32)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[BITCAST]](s32)
-    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[BITCAST]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 4, addrspace 1)
     %2:_(<5 x s16>) = G_IMPLICIT_DEF
@@ -8364,204 +8364,204 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v5s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v5s16_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
-    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
+    ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v5s16_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v5s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[BITCAST]](s32)
-    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[BITCAST]](s32)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[BITCAST]](s32)
-    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[BITCAST]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 2, addrspace 1)
     %2:_(<5 x s16>) = G_IMPLICIT_DEF
@@ -8581,387 +8581,387 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v5s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
-    ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
-    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
-    ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; SI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C7]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C7]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL6]]
-    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
-    ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL7]]
-    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
+    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
+    ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
+    ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C7]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]]
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C7]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL6]]
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+    ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL7]]
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v5s16_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1)
-    ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
-    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
+    ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v5s16_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
-    ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; CI-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; CI-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
-    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
-    ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C7]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]]
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C7]](s32)
-    ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL6]]
-    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
-    ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]]
-    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
-    ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL7]]
-    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
-    ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; CI-MESA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
+    ; CI-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; CI-MESA-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
+    ; CI-MESA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
+    ; CI-MESA-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C7]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]]
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C7]](s32)
+    ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL6]]
+    ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+    ; CI-MESA-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]]
+    ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL7]]
+    ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v5s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
-    ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16)
-    ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]]
-    ; VI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL6]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
-    ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C6]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL7]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
+    ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]]
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]]
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL6]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+    ; VI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C6]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL7]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1)
-    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[BITCAST]](s32)
-    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[BITCAST]](s32)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; GFX9-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
-    ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]]
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]]
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9-MESA: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9-MESA: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
-    ; GFX9-MESA: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR4]](s16)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[BITCAST]](s32)
-    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; GFX9-MESA-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; GFX9-MESA-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9-MESA-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
+    ; GFX9-MESA-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR4]](s16)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[BITCAST]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 1, addrspace 1)
     %2:_(<5 x s16>) = G_IMPLICIT_DEF
@@ -8981,35 +8981,35 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v6s16_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0
-    ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[EXTRACT]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[EXTRACT]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v6s16_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v6s16_align16
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; VI-LABEL: name: test_load_global_v6s16_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align16
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align16
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 16, addrspace 1)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -9023,39 +9023,39 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v6s16_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, align 8, addrspace 1)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, align 8, addrspace 1)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v6s16_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v6s16_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; VI-LABEL: name: test_load_global_v6s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 8, addrspace 1)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -9069,39 +9069,39 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v6s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, addrspace 1)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v6s16_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v6s16_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; VI-LABEL: name: test_load_global_v6s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 4, addrspace 1)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -9115,146 +9115,146 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v6s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v6s16_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v6s16_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; VI-LABEL: name: test_load_global_v6s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 2, addrspace 1)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -9268,282 +9268,282 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v6s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v6s16_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v6s16_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; CI-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; CI-MESA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-MESA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; VI-LABEL: name: test_load_global_v6s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-MESA-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-MESA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 1, addrspace 1)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -9557,216 +9557,216 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v7s16_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; SI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
-    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
-    ; SI: $vgpr3 = COPY [[BITCAST9]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+    ; SI-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+    ; SI-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
+    ; SI-NEXT: $vgpr3 = COPY [[BITCAST9]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v7s16_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
-    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
-    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; CI-HSA: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; CI-HSA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-HSA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-HSA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-HSA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
-    ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
-    ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
-    ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI-HSA: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CI-HSA: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
-    ; CI-HSA: $vgpr3 = COPY [[BITCAST9]](<2 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
+    ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; CI-HSA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
+    ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
+    ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+    ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-HSA-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST9]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v7s16_align16
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
-    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; CI-MESA: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI-MESA: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CI-MESA: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
-    ; CI-MESA: $vgpr3 = COPY [[BITCAST9]](<2 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; CI-MESA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-MESA-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST9]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v7s16_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
-    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
-    ; VI: $vgpr3 = COPY [[BITCAST9]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+    ; VI-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+    ; VI-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
+    ; VI-NEXT: $vgpr3 = COPY [[BITCAST9]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align16
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
-    ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; GFX9-HSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; GFX9-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
-    ; GFX9-HSA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; GFX9-HSA: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; GFX9-HSA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR2]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32)
-    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
+    ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; GFX9-HSA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR2]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align16
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
-    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
-    ; GFX9-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; GFX9-MESA: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; GFX9-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR2]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32)
-    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
+    ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; GFX9-MESA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR2]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 16, addrspace 1)
     %2:_(<7 x s16>) = G_IMPLICIT_DEF
@@ -9787,268 +9787,268 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v7s16_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
-    ; SI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v7s16_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
-    ; CI-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
-    ; CI-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; CI-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
-    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
-    ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
-    ; CI-HSA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; CI-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
+    ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
+    ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v7s16_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
-    ; CI-MESA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v7s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
-    ; VI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
-    ; GFX9-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
-    ; GFX9-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; GFX9-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[LOAD5]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD6]](s32), [[BITCAST]](s32)
-    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; GFX9-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[LOAD5]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD6]](s32), [[BITCAST]](s32)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[LOAD5]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD6]](s32), [[BITCAST]](s32)
-    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[LOAD5]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD6]](s32), [[BITCAST]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 8, addrspace 1)
     %2:_(<7 x s16>) = G_IMPLICIT_DEF
@@ -10069,268 +10069,268 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v7s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
-    ; SI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v7s16_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
-    ; CI-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
-    ; CI-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; CI-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
-    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
-    ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
-    ; CI-HSA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; CI-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
+    ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
+    ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v7s16_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
-    ; CI-MESA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v7s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
-    ; VI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
-    ; GFX9-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
-    ; GFX9-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; GFX9-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[LOAD5]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD6]](s32), [[BITCAST]](s32)
-    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; GFX9-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[LOAD5]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD6]](s32), [[BITCAST]](s32)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[LOAD5]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD6]](s32), [[BITCAST]](s32)
-    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[LOAD5]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD6]](s32), [[BITCAST]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 4, addrspace 1)
     %2:_(<7 x s16>) = G_IMPLICIT_DEF
@@ -10351,260 +10351,260 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v7s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C5]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C5]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
-    ; SI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C5]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C5]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v7s16_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; CI-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
-    ; CI-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; CI-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
-    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
-    ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
-    ; CI-HSA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; CI-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
+    ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
+    ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v7s16_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C5]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C5]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
-    ; CI-MESA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C5]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C5]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v7s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C5]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C5]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
-    ; VI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C5]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C5]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; GFX9-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
-    ; GFX9-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; GFX9-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[LOAD5]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD6]](s32), [[BITCAST]](s32)
-    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; GFX9-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[LOAD5]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD6]](s32), [[BITCAST]](s32)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[LOAD5]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD6]](s32), [[BITCAST]](s32)
-    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[LOAD5]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD6]](s32), [[BITCAST]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 2, addrspace 1)
     %2:_(<7 x s16>) = G_IMPLICIT_DEF
@@ -10625,513 +10625,513 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v7s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
-    ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
-    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
-    ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]]
-    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
-    ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; SI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C1]]
-    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
-    ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; SI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C7]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C7]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL8]]
-    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
-    ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C7]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL9]]
-    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
-    ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL10]]
-    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
-    ; SI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
+    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
+    ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
+    ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]]
+    ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
+    ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C1]]
+    ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
+    ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C7]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]]
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C7]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL8]]
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+    ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C7]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL9]]
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+    ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]]
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL10]]
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v7s16_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1)
-    ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1)
-    ; CI-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
-    ; CI-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, align 1, addrspace 1)
-    ; CI-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 1, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
-    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
-    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
-    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
-    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
-    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
-    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
-    ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
-    ; CI-HSA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; CI-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
+    ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
+    ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
+    ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
+    ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
+    ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
+    ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v7s16_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
-    ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; CI-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; CI-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
-    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
-    ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
-    ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]]
-    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
-    ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C1]]
-    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
-    ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C7]](s32)
-    ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]]
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
-    ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C7]](s32)
-    ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL8]]
-    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
-    ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
-    ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C7]](s32)
-    ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL9]]
-    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
-    ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]]
-    ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
-    ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL10]]
-    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
-    ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
-    ; CI-MESA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; CI-MESA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
+    ; CI-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; CI-MESA-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
+    ; CI-MESA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
+    ; CI-MESA-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; CI-MESA-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]]
+    ; CI-MESA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
+    ; CI-MESA-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; CI-MESA-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C1]]
+    ; CI-MESA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-MESA-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
+    ; CI-MESA-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C7]](s32)
+    ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]]
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; CI-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C7]](s32)
+    ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL8]]
+    ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+    ; CI-MESA-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; CI-MESA-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
+    ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C7]](s32)
+    ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL9]]
+    ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+    ; CI-MESA-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]]
+    ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL10]]
+    ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v7s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
-    ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16)
-    ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]]
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]]
-    ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C1]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C2]](s16)
-    ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]]
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C1]]
-    ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C1]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C2]](s16)
-    ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]]
-    ; VI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL8]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
-    ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL9]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
-    ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C6]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL10]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
-    ; VI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
+    ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]]
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]]
+    ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C1]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C2]](s16)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]]
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C1]]
+    ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C1]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C2]](s16)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]]
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]]
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL8]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+    ; VI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; VI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL9]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+    ; VI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C6]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL10]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1)
-    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1)
-    ; GFX9-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
-    ; GFX9-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, align 1, addrspace 1)
-    ; GFX9-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 1, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[LOAD5]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD6]](s32), [[BITCAST]](s32)
-    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-HSA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; GFX9-HSA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD4]](s32), [[LOAD5]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD6]](s32), [[BITCAST]](s32)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; GFX9-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
-    ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]]
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]]
-    ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]]
-    ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C1]]
-    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C2]](s16)
-    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]]
-    ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C1]]
-    ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C1]]
-    ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C2]](s16)
-    ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]]
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9-MESA: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9-MESA: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
-    ; GFX9-MESA: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR4]](s16)
-    ; GFX9-MESA: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[OR5]](s16)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[ANYEXT5]](s32)
-    ; GFX9-MESA: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[OR6]](s16)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT6]](s32), [[BITCAST]](s32)
-    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; GFX9-MESA-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; GFX9-MESA-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; GFX9-MESA-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; GFX9-MESA-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]]
+    ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; GFX9-MESA-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C1]]
+    ; GFX9-MESA-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; GFX9-MESA-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C2]](s16)
+    ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]]
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9-MESA-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
+    ; GFX9-MESA-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR4]](s16)
+    ; GFX9-MESA-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[OR5]](s16)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[ANYEXT5]](s32)
+    ; GFX9-MESA-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[OR6]](s16)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT6]](s32), [[BITCAST]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 1, addrspace 1)
     %2:_(<7 x s16>) = G_IMPLICIT_DEF
@@ -11152,34 +11152,34 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v8s16_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v8s16_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v8s16_align16
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
     ; VI-LABEL: name: test_load_global_v8s16_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v8s16_align16
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v8s16_align16
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 16, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -11193,34 +11193,34 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v8s16_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v8s16_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v8s16_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
     ; VI-LABEL: name: test_load_global_v8s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v8s16_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v8s16_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 8, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -11234,28 +11234,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2s32_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v2s32_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-MESA-LABEL: name: test_load_global_v2s32_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_load_global_v2s32_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -11269,28 +11269,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v2s32_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-MESA-LABEL: name: test_load_global_v2s32_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_load_global_v2s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -11304,104 +11304,104 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2s32_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v2s32_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 2, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 2, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-MESA-LABEL: name: test_load_global_v2s32_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_global_v2s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 2, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 2, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -11415,200 +11415,200 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2s32_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v2s32_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-MESA-LABEL: name: test_load_global_v2s32_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_global_v2s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -11622,29 +11622,29 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v3s32_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](<3 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](<3 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v3s32_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; CI-MESA-LABEL: name: test_load_global_v3s32_align16
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; VI-LABEL: name: test_load_global_v3s32_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s32_align16
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s32_align16
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 16, addrspace 1)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -11658,33 +11658,33 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v3s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, addrspace 1)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v3s32_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; CI-MESA-LABEL: name: test_load_global_v3s32_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; VI-LABEL: name: test_load_global_v3s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s32_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s32_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 1)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -11698,28 +11698,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v4s32_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v4s32_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; CI-MESA-LABEL: name: test_load_global_v4s32_align16
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; VI-LABEL: name: test_load_global_v4s32_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-HSA-LABEL: name: test_load_global_v4s32_align16
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-MESA-LABEL: name: test_load_global_v4s32_align16
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -11733,28 +11733,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v4s32_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v4s32_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; CI-MESA-LABEL: name: test_load_global_v4s32_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; VI-LABEL: name: test_load_global_v4s32_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-HSA-LABEL: name: test_load_global_v4s32_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-MESA-LABEL: name: test_load_global_v4s32_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -11768,28 +11768,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v4s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v4s32_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; CI-MESA-LABEL: name: test_load_global_v4s32_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; VI-LABEL: name: test_load_global_v4s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-HSA-LABEL: name: test_load_global_v4s32_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-MESA-LABEL: name: test_load_global_v4s32_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -11803,28 +11803,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v8s32_align32
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v8s32_align32
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>)
     ; CI-MESA-LABEL: name: test_load_global_v8s32_align32
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>)
     ; VI-LABEL: name: test_load_global_v8s32_align32
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>)
     ; GFX9-HSA-LABEL: name: test_load_global_v8s32_align32
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>)
     ; GFX9-MESA-LABEL: name: test_load_global_v8s32_align32
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 32, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -11888,28 +11888,28 @@ body: |
     ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>)
     ; SI-LABEL: name: test_load_global_v16s32_align32
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v16s32_align32
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
     ; CI-MESA-LABEL: name: test_load_global_v16s32_align32
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
     ; VI-LABEL: name: test_load_global_v16s32_align32
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
     ; GFX9-HSA-LABEL: name: test_load_global_v16s32_align32
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
     ; GFX9-MESA-LABEL: name: test_load_global_v16s32_align32
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 32, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
@@ -11923,28 +11923,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2s64_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; CI-HSA-LABEL: name: test_load_global_v2s64_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; CI-MESA-LABEL: name: test_load_global_v2s64_align16
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; VI-LABEL: name: test_load_global_v2s64_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align16
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align16
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 16, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -11958,28 +11958,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2s64_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; CI-HSA-LABEL: name: test_load_global_v2s64_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; CI-MESA-LABEL: name: test_load_global_v2s64_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; VI-LABEL: name: test_load_global_v2s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 8, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -11993,28 +11993,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2s64_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; CI-HSA-LABEL: name: test_load_global_v2s64_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; CI-MESA-LABEL: name: test_load_global_v2s64_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; VI-LABEL: name: test_load_global_v2s64_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -12028,184 +12028,184 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2s64_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; CI-HSA-LABEL: name: test_load_global_v2s64_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 2, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 2, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; CI-MESA-LABEL: name: test_load_global_v2s64_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
-    ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-MESA-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; VI-LABEL: name: test_load_global_v2s64_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 2, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 2, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 2, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -12219,476 +12219,476 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2s64_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; SI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
-    ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
-    ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; SI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
-    ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
-    ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
-    ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; SI-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
+    ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
+    ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
+    ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
+    ; SI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
+    ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; CI-HSA-LABEL: name: test_load_global_v2s64_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; CI-MESA-LABEL: name: test_load_global_v2s64_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; CI-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; CI-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
-    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
-    ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
-    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
-    ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
-    ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
-    ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
-    ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
-    ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
-    ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
-    ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
-    ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
-    ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-MESA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-MESA-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; CI-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; CI-MESA-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; CI-MESA-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
+    ; CI-MESA-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; CI-MESA-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; CI-MESA-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
+    ; CI-MESA-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
+    ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; CI-MESA-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; CI-MESA-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
+    ; CI-MESA-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
+    ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; CI-MESA-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; CI-MESA-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
+    ; CI-MESA-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
+    ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
+    ; CI-MESA-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; CI-MESA-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; CI-MESA-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; CI-MESA-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; CI-MESA-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; VI-LABEL: name: test_load_global_v2s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; VI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; VI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; VI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; VI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9-MESA: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; GFX9-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-MESA-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-MESA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-MESA-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; GFX9-MESA-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; GFX9-MESA-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-MESA-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; GFX9-MESA-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; GFX9-MESA-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; GFX9-MESA-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; GFX9-MESA-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; GFX9-MESA-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; GFX9-MESA-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; GFX9-MESA-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; GFX9-MESA-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; GFX9-MESA-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; GFX9-MESA-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; GFX9-MESA-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -12702,34 +12702,34 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2sp1_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; CI-HSA-LABEL: name: test_load_global_v2sp1_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; CI-MESA-LABEL: name: test_load_global_v2sp1_align16
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; VI-LABEL: name: test_load_global_v2sp1_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2sp1_align16
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2sp1_align16
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 16, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -12743,46 +12743,46 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v3s64_align32
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; CI-HSA-LABEL: name: test_load_global_v3s64_align32
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
-    ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
+    ; CI-HSA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; CI-MESA-LABEL: name: test_load_global_v3s64_align32
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
-    ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
+    ; CI-MESA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_global_v3s64_align32
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align32
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
-    ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
+    ; GFX9-HSA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align32
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
-    ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
+    ; GFX9-MESA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 1)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -12798,70 +12798,70 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v3s64_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
-    ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; CI-HSA-LABEL: name: test_load_global_v3s64_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; CI-MESA-LABEL: name: test_load_global_v3s64_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
+    ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_global_v3s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
-    ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
+    ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 1)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -12877,706 +12877,706 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v3s64_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; SI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
-    ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
-    ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; SI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
-    ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
-    ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
-    ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; SI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64)
-    ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
-    ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; SI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
-    ; SI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; SI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
-    ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; SI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
-    ; SI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
-    ; SI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; SI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
-    ; SI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; SI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
-    ; SI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; SI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
-    ; SI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; SI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C9]]
-    ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY8]](s32)
-    ; SI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
-    ; SI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]]
-    ; SI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; SI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C9]]
-    ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY9]](s32)
-    ; SI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
-    ; SI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]]
-    ; SI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; SI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C9]]
-    ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY10]](s32)
-    ; SI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
-    ; SI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]]
-    ; SI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; SI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C9]]
-    ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY11]](s32)
-    ; SI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
-    ; SI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]]
-    ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; SI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; SI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32)
-    ; SI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; SI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; SI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; SI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
-    ; SI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; SI-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
+    ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
+    ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
+    ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
+    ; SI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
+    ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; SI-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64)
+    ; SI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; SI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; SI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
+    ; SI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; SI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C9]]
+    ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY8]](s32)
+    ; SI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
+    ; SI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]]
+    ; SI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; SI-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C9]]
+    ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY9]](s32)
+    ; SI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
+    ; SI-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]]
+    ; SI-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C9]]
+    ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY10]](s32)
+    ; SI-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
+    ; SI-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]]
+    ; SI-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; SI-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C9]]
+    ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY11]](s32)
+    ; SI-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
+    ; SI-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]]
+    ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; SI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32)
+    ; SI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; SI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
+    ; SI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; SI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; CI-HSA-LABEL: name: test_load_global_v3s64_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; CI-MESA-LABEL: name: test_load_global_v3s64_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; CI-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; CI-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
-    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
-    ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
-    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
-    ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
-    ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
-    ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
-    ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
-    ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
-    ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
-    ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
-    ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
-    ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; CI-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI-MESA: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64)
-    ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
-    ; CI-MESA: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
-    ; CI-MESA: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; CI-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
-    ; CI-MESA: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; CI-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
-    ; CI-MESA: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; CI-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
-    ; CI-MESA: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; CI-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
-    ; CI-MESA: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; CI-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
-    ; CI-MESA: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; CI-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
-    ; CI-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; CI-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C9]]
-    ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY8]](s32)
-    ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
-    ; CI-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]]
-    ; CI-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; CI-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C9]]
-    ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY9]](s32)
-    ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
-    ; CI-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]]
-    ; CI-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; CI-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C9]]
-    ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY10]](s32)
-    ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
-    ; CI-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]]
-    ; CI-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; CI-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C9]]
-    ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY11]](s32)
-    ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
-    ; CI-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]]
-    ; CI-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; CI-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32)
-    ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; CI-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; CI-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
-    ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-MESA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-MESA-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; CI-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; CI-MESA-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; CI-MESA-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
+    ; CI-MESA-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; CI-MESA-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; CI-MESA-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
+    ; CI-MESA-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
+    ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; CI-MESA-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; CI-MESA-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
+    ; CI-MESA-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
+    ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; CI-MESA-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; CI-MESA-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
+    ; CI-MESA-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
+    ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
+    ; CI-MESA-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; CI-MESA-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; CI-MESA-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; CI-MESA-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; CI-MESA-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; CI-MESA-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64)
+    ; CI-MESA-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; CI-MESA-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; CI-MESA-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY8]](s32)
+    ; CI-MESA-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
+    ; CI-MESA-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]]
+    ; CI-MESA-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; CI-MESA-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY9]](s32)
+    ; CI-MESA-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
+    ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]]
+    ; CI-MESA-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; CI-MESA-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY10]](s32)
+    ; CI-MESA-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
+    ; CI-MESA-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]]
+    ; CI-MESA-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; CI-MESA-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY11]](s32)
+    ; CI-MESA-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
+    ; CI-MESA-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]]
+    ; CI-MESA-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; CI-MESA-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; CI-MESA-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; CI-MESA-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; CI-MESA-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; CI-MESA-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; CI-MESA-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_global_v3s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
-    ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
-    ; VI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
-    ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
-    ; VI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
-    ; VI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
-    ; VI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
-    ; VI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
-    ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
-    ; VI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
-    ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
-    ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
-    ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; VI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
-    ; VI: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
-    ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
-    ; VI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
-    ; VI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; VI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
-    ; VI: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
-    ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
-    ; VI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
-    ; VI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; VI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; VI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
-    ; VI: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
-    ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
-    ; VI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
-    ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
-    ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
-    ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; VI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; VI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; VI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; VI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; VI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; VI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
+    ; VI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; VI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; VI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
+    ; VI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
+    ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
+    ; VI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
+    ; VI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; VI-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; VI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
+    ; VI-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
+    ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
+    ; VI-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
+    ; VI-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; VI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; VI-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
+    ; VI-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
+    ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
+    ; VI-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
+    ; VI-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; VI-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; VI-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
+    ; VI-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
+    ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
+    ; VI-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
+    ; VI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; VI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
+    ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; VI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; VI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
+    ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9-MESA: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; GFX9-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; GFX9-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9-MESA: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; GFX9-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; GFX9-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
-    ; GFX9-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; GFX9-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
-    ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
-    ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
-    ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
-    ; GFX9-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; GFX9-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
-    ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
-    ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
-    ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
-    ; GFX9-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; GFX9-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
-    ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
-    ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
-    ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
-    ; GFX9-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; GFX9-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
-    ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
-    ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
-    ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
-    ; GFX9-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; GFX9-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
-    ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; GFX9-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; GFX9-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
-    ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-MESA-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-MESA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-MESA-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; GFX9-MESA-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; GFX9-MESA-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-MESA-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; GFX9-MESA-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; GFX9-MESA-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; GFX9-MESA-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; GFX9-MESA-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; GFX9-MESA-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; GFX9-MESA-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; GFX9-MESA-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; GFX9-MESA-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; GFX9-MESA-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; GFX9-MESA-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; GFX9-MESA-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; GFX9-MESA-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; GFX9-MESA-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
+    ; GFX9-MESA-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
+    ; GFX9-MESA-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; GFX9-MESA-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
+    ; GFX9-MESA-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
+    ; GFX9-MESA-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; GFX9-MESA-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
+    ; GFX9-MESA-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
+    ; GFX9-MESA-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; GFX9-MESA-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
+    ; GFX9-MESA-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
+    ; GFX9-MESA-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; GFX9-MESA-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; GFX9-MESA-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; GFX9-MESA-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; GFX9-MESA-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 1)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -13592,28 +13592,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v4s64_align32
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     ; CI-HSA-LABEL: name: test_load_global_v4s64_align32
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     ; CI-MESA-LABEL: name: test_load_global_v4s64_align32
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     ; VI-LABEL: name: test_load_global_v4s64_align32
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     ; GFX9-HSA-LABEL: name: test_load_global_v4s64_align32
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     ; GFX9-MESA-LABEL: name: test_load_global_v4s64_align32
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 32, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -13627,28 +13627,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v4s64_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     ; CI-HSA-LABEL: name: test_load_global_v4s64_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     ; CI-MESA-LABEL: name: test_load_global_v4s64_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     ; VI-LABEL: name: test_load_global_v4s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     ; GFX9-HSA-LABEL: name: test_load_global_v4s64_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     ; GFX9-MESA-LABEL: name: test_load_global_v4s64_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 8, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -13662,892 +13662,892 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v4s64_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; SI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
-    ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
-    ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; SI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
-    ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
-    ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
-    ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; SI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64)
-    ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
-    ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; SI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
-    ; SI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; SI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
-    ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; SI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
-    ; SI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
-    ; SI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; SI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
-    ; SI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; SI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
-    ; SI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; SI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
-    ; SI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; SI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C9]]
-    ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY8]](s32)
-    ; SI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
-    ; SI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]]
-    ; SI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; SI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C9]]
-    ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY9]](s32)
-    ; SI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
-    ; SI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]]
-    ; SI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; SI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C9]]
-    ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY10]](s32)
-    ; SI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
-    ; SI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]]
-    ; SI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; SI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C9]]
-    ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY11]](s32)
-    ; SI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
-    ; SI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]]
-    ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; SI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; SI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32)
-    ; SI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; SI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; SI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; SI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
-    ; SI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; SI: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
-    ; SI: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C13]](s64)
-    ; SI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p1) :: (load (s8) from unknown-address + 24, addrspace 1)
-    ; SI: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
-    ; SI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p1) :: (load (s8) from unknown-address + 25, addrspace 1)
-    ; SI: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
-    ; SI: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p1) :: (load (s8) from unknown-address + 26, addrspace 1)
-    ; SI: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
-    ; SI: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load (s8) from unknown-address + 27, addrspace 1)
-    ; SI: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
-    ; SI: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p1) :: (load (s8) from unknown-address + 28, addrspace 1)
-    ; SI: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
-    ; SI: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p1) :: (load (s8) from unknown-address + 29, addrspace 1)
-    ; SI: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
-    ; SI: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p1) :: (load (s8) from unknown-address + 30, addrspace 1)
-    ; SI: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
-    ; SI: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load (s8) from unknown-address + 31, addrspace 1)
-    ; SI: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
-    ; SI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
-    ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LOAD25]], [[C9]]
-    ; SI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY12]](s32)
-    ; SI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL18]](s32)
-    ; SI: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]]
-    ; SI: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
-    ; SI: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
-    ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LOAD27]], [[C9]]
-    ; SI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY13]](s32)
-    ; SI: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL19]](s32)
-    ; SI: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]]
-    ; SI: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
-    ; SI: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
-    ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LOAD29]], [[C9]]
-    ; SI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY14]](s32)
-    ; SI: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL20]](s32)
-    ; SI: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]]
-    ; SI: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
-    ; SI: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
-    ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LOAD31]], [[C9]]
-    ; SI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY15]](s32)
-    ; SI: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL21]](s32)
-    ; SI: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]]
-    ; SI: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
-    ; SI: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
-    ; SI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C10]](s32)
-    ; SI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
-    ; SI: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
-    ; SI: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
-    ; SI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C10]](s32)
-    ; SI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
-    ; SI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; SI-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
+    ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
+    ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
+    ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
+    ; SI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
+    ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; SI-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64)
+    ; SI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; SI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; SI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
+    ; SI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; SI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C9]]
+    ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY8]](s32)
+    ; SI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
+    ; SI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]]
+    ; SI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; SI-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C9]]
+    ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY9]](s32)
+    ; SI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
+    ; SI-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]]
+    ; SI-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C9]]
+    ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY10]](s32)
+    ; SI-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
+    ; SI-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]]
+    ; SI-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; SI-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C9]]
+    ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY11]](s32)
+    ; SI-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
+    ; SI-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]]
+    ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; SI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32)
+    ; SI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; SI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
+    ; SI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; SI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; SI-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+    ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C13]](s64)
+    ; SI-NEXT: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p1) :: (load (s8) from unknown-address + 24, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
+    ; SI-NEXT: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p1) :: (load (s8) from unknown-address + 25, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p1) :: (load (s8) from unknown-address + 26, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load (s8) from unknown-address + 27, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p1) :: (load (s8) from unknown-address + 28, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p1) :: (load (s8) from unknown-address + 29, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p1) :: (load (s8) from unknown-address + 30, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
+    ; SI-NEXT: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load (s8) from unknown-address + 31, addrspace 1)
+    ; SI-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
+    ; SI-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
+    ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LOAD25]], [[C9]]
+    ; SI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY12]](s32)
+    ; SI-NEXT: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL18]](s32)
+    ; SI-NEXT: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]]
+    ; SI-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
+    ; SI-NEXT: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
+    ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LOAD27]], [[C9]]
+    ; SI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY13]](s32)
+    ; SI-NEXT: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL19]](s32)
+    ; SI-NEXT: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]]
+    ; SI-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
+    ; SI-NEXT: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
+    ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LOAD29]], [[C9]]
+    ; SI-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY14]](s32)
+    ; SI-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL20]](s32)
+    ; SI-NEXT: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]]
+    ; SI-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
+    ; SI-NEXT: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
+    ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LOAD31]], [[C9]]
+    ; SI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY15]](s32)
+    ; SI-NEXT: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL21]](s32)
+    ; SI-NEXT: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]]
+    ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
+    ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
+    ; SI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C10]](s32)
+    ; SI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
+    ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
+    ; SI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
+    ; SI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C10]](s32)
+    ; SI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
+    ; SI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
     ; CI-HSA-LABEL: name: test_load_global_v4s64_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 1, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 1, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     ; CI-MESA-LABEL: name: test_load_global_v4s64_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; CI-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; CI-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
-    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
-    ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
-    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
-    ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
-    ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
-    ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
-    ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
-    ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
-    ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
-    ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
-    ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
-    ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; CI-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI-MESA: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64)
-    ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
-    ; CI-MESA: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
-    ; CI-MESA: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; CI-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
-    ; CI-MESA: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; CI-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
-    ; CI-MESA: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; CI-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
-    ; CI-MESA: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; CI-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
-    ; CI-MESA: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; CI-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
-    ; CI-MESA: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; CI-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
-    ; CI-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; CI-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C9]]
-    ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY8]](s32)
-    ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
-    ; CI-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]]
-    ; CI-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; CI-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C9]]
-    ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY9]](s32)
-    ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
-    ; CI-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]]
-    ; CI-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; CI-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C9]]
-    ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY10]](s32)
-    ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
-    ; CI-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]]
-    ; CI-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; CI-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C9]]
-    ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY11]](s32)
-    ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
-    ; CI-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]]
-    ; CI-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; CI-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32)
-    ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; CI-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; CI-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
-    ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; CI-MESA: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
-    ; CI-MESA: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C13]](s64)
-    ; CI-MESA: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p1) :: (load (s8) from unknown-address + 24, addrspace 1)
-    ; CI-MESA: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
-    ; CI-MESA: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p1) :: (load (s8) from unknown-address + 25, addrspace 1)
-    ; CI-MESA: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
-    ; CI-MESA: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p1) :: (load (s8) from unknown-address + 26, addrspace 1)
-    ; CI-MESA: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
-    ; CI-MESA: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load (s8) from unknown-address + 27, addrspace 1)
-    ; CI-MESA: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
-    ; CI-MESA: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p1) :: (load (s8) from unknown-address + 28, addrspace 1)
-    ; CI-MESA: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
-    ; CI-MESA: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p1) :: (load (s8) from unknown-address + 29, addrspace 1)
-    ; CI-MESA: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
-    ; CI-MESA: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p1) :: (load (s8) from unknown-address + 30, addrspace 1)
-    ; CI-MESA: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
-    ; CI-MESA: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load (s8) from unknown-address + 31, addrspace 1)
-    ; CI-MESA: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
-    ; CI-MESA: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
-    ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LOAD25]], [[C9]]
-    ; CI-MESA: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY12]](s32)
-    ; CI-MESA: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL18]](s32)
-    ; CI-MESA: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]]
-    ; CI-MESA: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
-    ; CI-MESA: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
-    ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LOAD27]], [[C9]]
-    ; CI-MESA: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY13]](s32)
-    ; CI-MESA: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL19]](s32)
-    ; CI-MESA: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]]
-    ; CI-MESA: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
-    ; CI-MESA: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
-    ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LOAD29]], [[C9]]
-    ; CI-MESA: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY14]](s32)
-    ; CI-MESA: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL20]](s32)
-    ; CI-MESA: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]]
-    ; CI-MESA: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
-    ; CI-MESA: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
-    ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-MESA: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LOAD31]], [[C9]]
-    ; CI-MESA: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY15]](s32)
-    ; CI-MESA: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL21]](s32)
-    ; CI-MESA: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]]
-    ; CI-MESA: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
-    ; CI-MESA: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
-    ; CI-MESA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C10]](s32)
-    ; CI-MESA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
-    ; CI-MESA: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
-    ; CI-MESA: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
-    ; CI-MESA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C10]](s32)
-    ; CI-MESA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
-    ; CI-MESA: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-MESA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-MESA-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; CI-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; CI-MESA-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; CI-MESA-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY4]](s32)
+    ; CI-MESA-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; CI-MESA-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; CI-MESA-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY5]](s32)
+    ; CI-MESA-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
+    ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; CI-MESA-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; CI-MESA-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY6]](s32)
+    ; CI-MESA-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
+    ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; CI-MESA-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; CI-MESA-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY7]](s32)
+    ; CI-MESA-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
+    ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
+    ; CI-MESA-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; CI-MESA-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; CI-MESA-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; CI-MESA-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; CI-MESA-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; CI-MESA-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64)
+    ; CI-MESA-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; CI-MESA-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; CI-MESA-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY8]](s32)
+    ; CI-MESA-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
+    ; CI-MESA-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]]
+    ; CI-MESA-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; CI-MESA-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY9]](s32)
+    ; CI-MESA-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
+    ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]]
+    ; CI-MESA-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; CI-MESA-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY10]](s32)
+    ; CI-MESA-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
+    ; CI-MESA-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]]
+    ; CI-MESA-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; CI-MESA-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY11]](s32)
+    ; CI-MESA-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
+    ; CI-MESA-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]]
+    ; CI-MESA-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; CI-MESA-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; CI-MESA-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; CI-MESA-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; CI-MESA-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; CI-MESA-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; CI-MESA-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; CI-MESA-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+    ; CI-MESA-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C13]](s64)
+    ; CI-MESA-NEXT: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p1) :: (load (s8) from unknown-address + 24, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p1) :: (load (s8) from unknown-address + 25, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p1) :: (load (s8) from unknown-address + 26, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load (s8) from unknown-address + 27, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p1) :: (load (s8) from unknown-address + 28, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p1) :: (load (s8) from unknown-address + 29, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p1) :: (load (s8) from unknown-address + 30, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
+    ; CI-MESA-NEXT: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load (s8) from unknown-address + 31, addrspace 1)
+    ; CI-MESA-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
+    ; CI-MESA-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LOAD25]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY12]](s32)
+    ; CI-MESA-NEXT: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL18]](s32)
+    ; CI-MESA-NEXT: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]]
+    ; CI-MESA-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
+    ; CI-MESA-NEXT: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LOAD27]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY13]](s32)
+    ; CI-MESA-NEXT: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL19]](s32)
+    ; CI-MESA-NEXT: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]]
+    ; CI-MESA-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
+    ; CI-MESA-NEXT: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LOAD29]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY14]](s32)
+    ; CI-MESA-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL20]](s32)
+    ; CI-MESA-NEXT: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]]
+    ; CI-MESA-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
+    ; CI-MESA-NEXT: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
+    ; CI-MESA-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LOAD31]], [[C9]]
+    ; CI-MESA-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY15]](s32)
+    ; CI-MESA-NEXT: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL21]](s32)
+    ; CI-MESA-NEXT: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]]
+    ; CI-MESA-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
+    ; CI-MESA-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
+    ; CI-MESA-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
+    ; CI-MESA-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
+    ; CI-MESA-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
+    ; CI-MESA-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
+    ; CI-MESA-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
     ; VI-LABEL: name: test_load_global_v4s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
-    ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
-    ; VI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
-    ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
-    ; VI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
-    ; VI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
-    ; VI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
-    ; VI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
-    ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
-    ; VI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
-    ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
-    ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
-    ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; VI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
-    ; VI: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
-    ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
-    ; VI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
-    ; VI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; VI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
-    ; VI: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
-    ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
-    ; VI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
-    ; VI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; VI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; VI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
-    ; VI: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
-    ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
-    ; VI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
-    ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
-    ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
-    ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; VI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
-    ; VI: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64)
-    ; VI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p1) :: (load (s8) from unknown-address + 24, addrspace 1)
-    ; VI: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
-    ; VI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p1) :: (load (s8) from unknown-address + 25, addrspace 1)
-    ; VI: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
-    ; VI: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p1) :: (load (s8) from unknown-address + 26, addrspace 1)
-    ; VI: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
-    ; VI: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load (s8) from unknown-address + 27, addrspace 1)
-    ; VI: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
-    ; VI: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p1) :: (load (s8) from unknown-address + 28, addrspace 1)
-    ; VI: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
-    ; VI: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p1) :: (load (s8) from unknown-address + 29, addrspace 1)
-    ; VI: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
-    ; VI: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p1) :: (load (s8) from unknown-address + 30, addrspace 1)
-    ; VI: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
-    ; VI: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load (s8) from unknown-address + 31, addrspace 1)
-    ; VI: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
-    ; VI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
-    ; VI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32)
-    ; VI: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]]
-    ; VI: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16)
-    ; VI: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]]
-    ; VI: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
-    ; VI: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
-    ; VI: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32)
-    ; VI: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]]
-    ; VI: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16)
-    ; VI: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]]
-    ; VI: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
-    ; VI: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
-    ; VI: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32)
-    ; VI: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]]
-    ; VI: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16)
-    ; VI: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]]
-    ; VI: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
-    ; VI: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
-    ; VI: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32)
-    ; VI: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]]
-    ; VI: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16)
-    ; VI: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]]
-    ; VI: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
-    ; VI: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
-    ; VI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32)
-    ; VI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
-    ; VI: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
-    ; VI: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
-    ; VI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32)
-    ; VI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
-    ; VI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; VI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; VI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; VI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; VI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; VI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; VI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
+    ; VI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; VI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; VI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
+    ; VI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
+    ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
+    ; VI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
+    ; VI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; VI-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; VI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
+    ; VI-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
+    ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
+    ; VI-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
+    ; VI-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; VI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; VI-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
+    ; VI-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
+    ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
+    ; VI-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
+    ; VI-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; VI-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; VI-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
+    ; VI-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
+    ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
+    ; VI-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
+    ; VI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; VI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
+    ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; VI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; VI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
+    ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; VI-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+    ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64)
+    ; VI-NEXT: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p1) :: (load (s8) from unknown-address + 24, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
+    ; VI-NEXT: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p1) :: (load (s8) from unknown-address + 25, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p1) :: (load (s8) from unknown-address + 26, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load (s8) from unknown-address + 27, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p1) :: (load (s8) from unknown-address + 28, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p1) :: (load (s8) from unknown-address + 29, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p1) :: (load (s8) from unknown-address + 30, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
+    ; VI-NEXT: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load (s8) from unknown-address + 31, addrspace 1)
+    ; VI-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
+    ; VI-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
+    ; VI-NEXT: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32)
+    ; VI-NEXT: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]]
+    ; VI-NEXT: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16)
+    ; VI-NEXT: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]]
+    ; VI-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
+    ; VI-NEXT: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
+    ; VI-NEXT: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32)
+    ; VI-NEXT: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]]
+    ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16)
+    ; VI-NEXT: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]]
+    ; VI-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
+    ; VI-NEXT: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
+    ; VI-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32)
+    ; VI-NEXT: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]]
+    ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16)
+    ; VI-NEXT: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]]
+    ; VI-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
+    ; VI-NEXT: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
+    ; VI-NEXT: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32)
+    ; VI-NEXT: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]]
+    ; VI-NEXT: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16)
+    ; VI-NEXT: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]]
+    ; VI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
+    ; VI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
+    ; VI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32)
+    ; VI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
+    ; VI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
+    ; VI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
+    ; VI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32)
+    ; VI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
+    ; VI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
     ; GFX9-HSA-LABEL: name: test_load_global_v4s64_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 1, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
     ; GFX9-MESA-LABEL: name: test_load_global_v4s64_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9-MESA: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; GFX9-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; GFX9-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9-MESA: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; GFX9-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; GFX9-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
-    ; GFX9-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
-    ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
-    ; GFX9-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
-    ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
-    ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
-    ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
-    ; GFX9-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
-    ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
-    ; GFX9-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
-    ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
-    ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
-    ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
-    ; GFX9-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
-    ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
-    ; GFX9-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
-    ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
-    ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
-    ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
-    ; GFX9-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
-    ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
-    ; GFX9-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
-    ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
-    ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
-    ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
-    ; GFX9-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
-    ; GFX9-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
-    ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
-    ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
-    ; GFX9-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; GFX9-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
-    ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
-    ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
-    ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; GFX9-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
-    ; GFX9-MESA: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64)
-    ; GFX9-MESA: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p1) :: (load (s8) from unknown-address + 24, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p1) :: (load (s8) from unknown-address + 25, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p1) :: (load (s8) from unknown-address + 26, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load (s8) from unknown-address + 27, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p1) :: (load (s8) from unknown-address + 28, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p1) :: (load (s8) from unknown-address + 29, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
-    ; GFX9-MESA: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p1) :: (load (s8) from unknown-address + 30, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
-    ; GFX9-MESA: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load (s8) from unknown-address + 31, addrspace 1)
-    ; GFX9-MESA: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
-    ; GFX9-MESA: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
-    ; GFX9-MESA: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32)
-    ; GFX9-MESA: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]]
-    ; GFX9-MESA: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16)
-    ; GFX9-MESA: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]]
-    ; GFX9-MESA: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
-    ; GFX9-MESA: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
-    ; GFX9-MESA: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32)
-    ; GFX9-MESA: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]]
-    ; GFX9-MESA: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16)
-    ; GFX9-MESA: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]]
-    ; GFX9-MESA: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
-    ; GFX9-MESA: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
-    ; GFX9-MESA: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32)
-    ; GFX9-MESA: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]]
-    ; GFX9-MESA: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16)
-    ; GFX9-MESA: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]]
-    ; GFX9-MESA: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
-    ; GFX9-MESA: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
-    ; GFX9-MESA: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32)
-    ; GFX9-MESA: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]]
-    ; GFX9-MESA: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16)
-    ; GFX9-MESA: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]]
-    ; GFX9-MESA: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
-    ; GFX9-MESA: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
-    ; GFX9-MESA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32)
-    ; GFX9-MESA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
-    ; GFX9-MESA: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
-    ; GFX9-MESA: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
-    ; GFX9-MESA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32)
-    ; GFX9-MESA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
-    ; GFX9-MESA: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-MESA-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-MESA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-MESA-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; GFX9-MESA-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; GFX9-MESA-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-MESA-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; GFX9-MESA-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; GFX9-MESA-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; GFX9-MESA-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; GFX9-MESA-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; GFX9-MESA-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; GFX9-MESA-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; GFX9-MESA-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; GFX9-MESA-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; GFX9-MESA-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; GFX9-MESA-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; GFX9-MESA-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; GFX9-MESA-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32)
+    ; GFX9-MESA-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32)
+    ; GFX9-MESA-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]]
+    ; GFX9-MESA-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32)
+    ; GFX9-MESA-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32)
+    ; GFX9-MESA-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]]
+    ; GFX9-MESA-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32)
+    ; GFX9-MESA-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32)
+    ; GFX9-MESA-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]]
+    ; GFX9-MESA-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32)
+    ; GFX9-MESA-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32)
+    ; GFX9-MESA-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]]
+    ; GFX9-MESA-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16)
+    ; GFX9-MESA-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]]
+    ; GFX9-MESA-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16)
+    ; GFX9-MESA-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
+    ; GFX9-MESA-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; GFX9-MESA-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+    ; GFX9-MESA-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p1) :: (load (s8) from unknown-address + 24, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p1) :: (load (s8) from unknown-address + 25, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p1) :: (load (s8) from unknown-address + 26, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load (s8) from unknown-address + 27, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p1) :: (load (s8) from unknown-address + 28, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p1) :: (load (s8) from unknown-address + 29, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p1) :: (load (s8) from unknown-address + 30, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load (s8) from unknown-address + 31, addrspace 1)
+    ; GFX9-MESA-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32)
+    ; GFX9-MESA-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32)
+    ; GFX9-MESA-NEXT: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]]
+    ; GFX9-MESA-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32)
+    ; GFX9-MESA-NEXT: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32)
+    ; GFX9-MESA-NEXT: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]]
+    ; GFX9-MESA-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32)
+    ; GFX9-MESA-NEXT: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32)
+    ; GFX9-MESA-NEXT: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]]
+    ; GFX9-MESA-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32)
+    ; GFX9-MESA-NEXT: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]]
+    ; GFX9-MESA-NEXT: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32)
+    ; GFX9-MESA-NEXT: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]]
+    ; GFX9-MESA-NEXT: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16)
+    ; GFX9-MESA-NEXT: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]]
+    ; GFX9-MESA-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16)
+    ; GFX9-MESA-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]]
+    ; GFX9-MESA-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16)
+    ; GFX9-MESA-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]]
+    ; GFX9-MESA-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 1, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -14561,34 +14561,34 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2s128_align32
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
     ; CI-HSA-LABEL: name: test_load_global_v2s128_align32
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
     ; CI-MESA-LABEL: name: test_load_global_v2s128_align32
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
     ; VI-LABEL: name: test_load_global_v2s128_align32
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s128_align32
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s128_align32
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s128>) = G_LOAD %0 :: (load (<2 x s128>), align 32, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -14602,34 +14602,34 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2p1_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; CI-HSA-LABEL: name: test_load_global_v2p1_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; CI-MESA-LABEL: name: test_load_global_v2p1_align16
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; VI-LABEL: name: test_load_global_v2p1_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align16
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align16
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 16, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -14643,34 +14643,34 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2p1_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; CI-HSA-LABEL: name: test_load_global_v2p1_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; CI-MESA-LABEL: name: test_load_global_v2p1_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; VI-LABEL: name: test_load_global_v2p1_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 8, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -14684,34 +14684,34 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2p1_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; CI-HSA-LABEL: name: test_load_global_v2p1_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; CI-MESA-LABEL: name: test_load_global_v2p1_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; VI-LABEL: name: test_load_global_v2p1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -14725,358 +14725,358 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2p1_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; CI-HSA-LABEL: name: test_load_global_v2p1_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; CI-MESA-LABEL: name: test_load_global_v2p1_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; CI-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; CI-MESA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-MESA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; CI-MESA-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; CI-MESA-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-MESA-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-MESA-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-MESA-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; VI-LABEL: name: test_load_global_v2p1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-MESA-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-MESA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-MESA-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-MESA-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-MESA-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 1, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -15090,34 +15090,34 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v4p1_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
     ; CI-HSA-LABEL: name: test_load_global_v4p1_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
     ; CI-MESA-LABEL: name: test_load_global_v4p1_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
     ; VI-LABEL: name: test_load_global_v4p1_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
     ; GFX9-HSA-LABEL: name: test_load_global_v4p1_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
     ; GFX9-MESA-LABEL: name: test_load_global_v4p1_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x p1>) = G_LOAD %0 :: (load (<4 x p1>), align 8, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -15131,28 +15131,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2p3_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; CI-HSA-LABEL: name: test_load_global_v2p3_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; CI-MESA-LABEL: name: test_load_global_v2p3_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; VI-LABEL: name: test_load_global_v2p3_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2p3_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2p3_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -15166,28 +15166,28 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2p3_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; CI-HSA-LABEL: name: test_load_global_v2p3_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; CI-MESA-LABEL: name: test_load_global_v2p3_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; VI-LABEL: name: test_load_global_v2p3_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2p3_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2p3_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 4, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -15201,208 +15201,208 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v2p3_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
-    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
     ; CI-HSA-LABEL: name: test_load_global_v2p3_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 1, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 1, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; CI-MESA-LABEL: name: test_load_global_v2p3_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-MESA: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-MESA-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
     ; VI-LABEL: name: test_load_global_v2p3_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
     ; GFX9-HSA-LABEL: name: test_load_global_v2p3_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 1, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; GFX9-MESA-LABEL: name: test_load_global_v2p3_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9-MESA: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-MESA-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 1, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -15416,28 +15416,28 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_global_s32_from_1_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_1_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_1_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_ext_load_global_s32_from_1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_1_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_1_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 1)
     $vgpr0 = COPY %1
@@ -15451,28 +15451,28 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_global_s32_from_2_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_2_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_2_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_ext_load_global_s32_from_2_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_2_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_2_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s16), align 4, addrspace 1)
     $vgpr0 = COPY %1
@@ -15486,148 +15486,148 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_global_s32_from_s24_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; CI-HSA: $vgpr0 = COPY [[OR]](s32)
+    ; CI-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-MESA: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_ext_load_global_s32_from_s24_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX9-HSA: $vgpr0 = COPY [[OR]](s32)
+    ; GFX9-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9-MESA: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s24), align 1, addrspace 1)
     $vgpr0 = COPY %1
@@ -15640,144 +15640,144 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_global_s32_from_s24_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; CI-HSA: $vgpr0 = COPY [[OR]](s32)
+    ; CI-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-MESA: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_ext_load_global_s32_from_s24_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX9-HSA: $vgpr0 = COPY [[OR]](s32)
+    ; GFX9-HSA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9-MESA: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-MESA-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s24), align 2, addrspace 1)
     $vgpr0 = COPY %1
@@ -15791,28 +15791,28 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_global_s32_from_s24_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_ext_load_global_s32_from_s24_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s24), align 4, addrspace 1)
     $vgpr0 = COPY %1
@@ -15827,34 +15827,34 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_global_s64_from_1_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_1_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_1_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_global_s64_from_1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_1_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_1_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -15868,34 +15868,34 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_global_s64_from_2_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_2_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_2_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_global_s64_from_2_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_2_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_2_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -15913,34 +15913,34 @@ body: |
     ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; SI-LABEL: name: test_ext_load_global_s64_from_4_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_4_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_4_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_global_s64_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_4_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_4_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s32), align 4, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -15954,52 +15954,52 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_global_s128_from_4_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; SI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; SI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; SI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; CI-HSA-LABEL: name: test_ext_load_global_s128_from_4_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI-HSA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; CI-HSA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CI-HSA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; CI-HSA-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CI-HSA-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; CI-MESA-LABEL: name: test_ext_load_global_s128_from_4_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; CI-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CI-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; CI-MESA-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CI-MESA-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; VI-LABEL: name: test_ext_load_global_s128_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; GFX9-HSA-LABEL: name: test_ext_load_global_s128_from_4_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; GFX9-HSA-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; GFX9-HSA-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; GFX9-MESA-LABEL: name: test_ext_load_global_s128_from_4_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; GFX9-MESA-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; GFX9-MESA-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_LOAD %0 :: (load (s32), align 4, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -16013,34 +16013,34 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_global_s64_from_2_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_2_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_2_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_global_s64_from_2_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_2_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_2_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -16054,34 +16054,34 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_global_s64_from_1_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_1_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_1_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_global_s64_from_1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_1_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_1_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -16095,28 +16095,28 @@ body: |
 
     ; SI-LABEL: name: test_extload_global_v2s32_from_v2s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_extload_global_v2s32_from_v2s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -16130,28 +16130,28 @@ body: |
 
     ; SI-LABEL: name: test_extload_global_v2s32_from_v2s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_extload_global_v2s32_from_v2s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -16165,28 +16165,28 @@ body: |
 
     ; SI-LABEL: name: test_extload_global_v2s32_from_v2s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_extload_global_v2s32_from_v2s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -16200,28 +16200,28 @@ body: |
 
     ; SI-LABEL: name: test_extload_global_v3s32_from_v3s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; CI-HSA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; CI-MESA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; VI-LABEL: name: test_extload_global_v3s32_from_v3s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; GFX9-HSA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; GFX9-MESA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 1)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -16235,28 +16235,28 @@ body: |
 
     ; SI-LABEL: name: test_extload_global_v4s32_from_v4s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; CI-HSA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; CI-MESA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; VI-LABEL: name: test_extload_global_v4s32_from_v4s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-HSA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-MESA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -16270,536 +16270,536 @@ body: |
 
     ; SI-LABEL: name: test_global_v2s96_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
-    ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
-    ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; SI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
-    ; SI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; SI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
-    ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; SI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
-    ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
-    ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
-    ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; SI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
-    ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
-    ; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
-    ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
-    ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; SI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
-    ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
-    ; SI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
-    ; SI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
-    ; SI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64)
-    ; SI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
-    ; SI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
-    ; SI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
-    ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
-    ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
-    ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; SI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
-    ; SI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
-    ; SI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
-    ; SI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
-    ; SI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
+    ; SI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; SI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
+    ; SI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
+    ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
+    ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
+    ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
+    ; SI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; SI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
+    ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
+    ; SI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
+    ; SI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
+    ; SI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
+    ; SI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
+    ; SI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
+    ; SI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
+    ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; SI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; SI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
+    ; SI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
+    ; SI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; SI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
+    ; SI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
+    ; SI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-HSA-LABEL: name: test_global_v2s96_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 1)
-    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI-HSA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-MESA-LABEL: name: test_global_v2s96_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI-MESA: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
-    ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
-    ; CI-MESA: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
-    ; CI-MESA: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; CI-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
-    ; CI-MESA: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; CI-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
-    ; CI-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
-    ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
-    ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; CI-MESA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; CI-MESA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
-    ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
-    ; CI-MESA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
-    ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
-    ; CI-MESA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; CI-MESA: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
-    ; CI-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
-    ; CI-MESA: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
-    ; CI-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
-    ; CI-MESA: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64)
-    ; CI-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
-    ; CI-MESA: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
-    ; CI-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
-    ; CI-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
-    ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
-    ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; CI-MESA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; CI-MESA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
-    ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
-    ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
-    ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
-    ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; CI-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; CI-MESA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-MESA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; CI-MESA-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; CI-MESA-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-MESA-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-MESA-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-MESA-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
+    ; CI-MESA-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
+    ; CI-MESA-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
+    ; CI-MESA-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; CI-MESA-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; CI-MESA-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; CI-MESA-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
+    ; CI-MESA-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64)
+    ; CI-MESA-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
+    ; CI-MESA-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
+    ; CI-MESA-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
+    ; CI-MESA-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; CI-MESA-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; CI-MESA-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
+    ; CI-MESA-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; CI-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; VI-LABEL: name: test_global_v2s96_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
-    ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
-    ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
-    ; VI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
-    ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
-    ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
-    ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
-    ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; VI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
-    ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
-    ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
-    ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
-    ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; VI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
-    ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
-    ; VI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
-    ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
-    ; VI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64)
-    ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
-    ; VI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
-    ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
-    ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
-    ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
-    ; VI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
-    ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
-    ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
-    ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
-    ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
+    ; VI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; VI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
+    ; VI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
+    ; VI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
+    ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; VI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
+    ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
+    ; VI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; VI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
+    ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
+    ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
+    ; VI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
+    ; VI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64)
+    ; VI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
+    ; VI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
+    ; VI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
+    ; VI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
+    ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; VI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
+    ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
+    ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; VI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
+    ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
+    ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-HSA-LABEL: name: test_global_v2s96_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 1)
-    ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9-HSA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-MESA-LABEL: name: test_global_v2s96_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
-    ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
-    ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9-MESA: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
-    ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
-    ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
-    ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
-    ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
-    ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
-    ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
-    ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
-    ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; GFX9-MESA: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
-    ; GFX9-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64)
-    ; GFX9-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
-    ; GFX9-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
-    ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
-    ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
-    ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
-    ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
-    ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
-    ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
-    ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
-    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-MESA-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-MESA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-MESA-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-MESA-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-MESA-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-MESA-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; GFX9-MESA-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; GFX9-MESA-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; GFX9-MESA-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
+    ; GFX9-MESA-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; GFX9-MESA-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; GFX9-MESA-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
+    ; GFX9-MESA-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 1)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -16816,280 +16816,280 @@ body: |
 
     ; SI-LABEL: name: test_global_v2s96_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1)
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s16) from unknown-address + 16, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s16) from unknown-address + 18, addrspace 1)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s16) from unknown-address + 20, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s16) from unknown-address + 22, addrspace 1)
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s16) from unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s16) from unknown-address + 18, addrspace 1)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s16) from unknown-address + 20, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s16) from unknown-address + 22, addrspace 1)
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-HSA-LABEL: name: test_global_v2s96_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 1)
-    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI-HSA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-MESA-LABEL: name: test_global_v2s96_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
-    ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1)
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
-    ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s16) from unknown-address + 16, addrspace 1)
-    ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s16) from unknown-address + 18, addrspace 1)
-    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
-    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
-    ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s16) from unknown-address + 20, addrspace 1)
-    ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
-    ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s16) from unknown-address + 22, addrspace 1)
-    ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
-    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1)
+    ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
+    ; CI-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s16) from unknown-address + 16, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s16) from unknown-address + 18, addrspace 1)
+    ; CI-MESA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
+    ; CI-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; CI-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
+    ; CI-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s16) from unknown-address + 20, addrspace 1)
+    ; CI-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s16) from unknown-address + 22, addrspace 1)
+    ; CI-MESA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
+    ; CI-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
+    ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; CI-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; VI-LABEL: name: test_global_v2s96_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1)
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s16) from unknown-address + 16, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s16) from unknown-address + 18, addrspace 1)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s16) from unknown-address + 20, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s16) from unknown-address + 22, addrspace 1)
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s16) from unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s16) from unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s16) from unknown-address + 20, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s16) from unknown-address + 22, addrspace 1)
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-HSA-LABEL: name: test_global_v2s96_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 1)
-    ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9-HSA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-MESA-LABEL: name: test_global_v2s96_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
-    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1)
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
-    ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s16) from unknown-address + 16, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s16) from unknown-address + 18, addrspace 1)
-    ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
-    ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
-    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
-    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
-    ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s16) from unknown-address + 20, addrspace 1)
-    ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s16) from unknown-address + 22, addrspace 1)
-    ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
-    ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
-    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
-    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX9-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; GFX9-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-MESA-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s16) from unknown-address + 16, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s16) from unknown-address + 18, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
+    ; GFX9-MESA-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; GFX9-MESA-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s16) from unknown-address + 20, addrspace 1)
+    ; GFX9-MESA-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s16) from unknown-address + 22, addrspace 1)
+    ; GFX9-MESA-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
+    ; GFX9-MESA-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
+    ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 1)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -17106,85 +17106,85 @@ body: |
 
     ; SI-LABEL: name: test_global_v2s96_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, addrspace 1)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32) from unknown-address + 20, addrspace 1)
-    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32) from unknown-address + 20, addrspace 1)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-HSA-LABEL: name: test_global_v2s96_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
-    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI-HSA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-MESA-LABEL: name: test_global_v2s96_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; VI-LABEL: name: test_global_v2s96_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-HSA-LABEL: name: test_global_v2s96_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
-    ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9-HSA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-MESA-LABEL: name: test_global_v2s96_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
-    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 1)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -17201,82 +17201,82 @@ body: |
 
     ; SI-LABEL: name: test_global_v2s96_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32) from unknown-address + 20, addrspace 1)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32) from unknown-address + 20, addrspace 1)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-HSA-LABEL: name: test_global_v2s96_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
-    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI-HSA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
+    ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-MESA-LABEL: name: test_global_v2s96_align16
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
+    ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; VI-LABEL: name: test_global_v2s96_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-HSA-LABEL: name: test_global_v2s96_align16
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
-    ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9-HSA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
+    ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-MESA-LABEL: name: test_global_v2s96_align16
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
-    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1)
+    ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 1)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -17293,444 +17293,444 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v32s1_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32)
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32)
-    ; SI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32)
-    ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
-    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32)
-    ; SI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32)
-    ; SI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
-    ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32)
-    ; SI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
-    ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32)
-    ; SI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
-    ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32)
-    ; SI: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32)
-    ; SI: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
-    ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32)
-    ; SI: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18
-    ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32)
-    ; SI: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
-    ; SI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32)
-    ; SI: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; SI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32)
-    ; SI: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21
-    ; SI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32)
-    ; SI: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22
-    ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32)
-    ; SI: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
-    ; SI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32)
-    ; SI: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32)
-    ; SI: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
-    ; SI: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32)
-    ; SI: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26
-    ; SI: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32)
-    ; SI: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27
-    ; SI: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32)
-    ; SI: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; SI: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32)
-    ; SI: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
-    ; SI: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32)
-    ; SI: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
-    ; SI: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32)
-    ; SI: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
-    ; SI: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>)
-    ; SI: $vgpr0 = COPY [[TRUNC]](<32 x s1>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+    ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32)
+    ; SI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+    ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32)
+    ; SI-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32)
+    ; SI-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+    ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32)
+    ; SI-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+    ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32)
+    ; SI-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32)
+    ; SI-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32)
+    ; SI-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
+    ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32)
+    ; SI-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18
+    ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32)
+    ; SI-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
+    ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32)
+    ; SI-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32)
+    ; SI-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21
+    ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32)
+    ; SI-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22
+    ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32)
+    ; SI-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32)
+    ; SI-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32)
+    ; SI-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
+    ; SI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32)
+    ; SI-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26
+    ; SI-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32)
+    ; SI-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27
+    ; SI-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32)
+    ; SI-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; SI-NEXT: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32)
+    ; SI-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
+    ; SI-NEXT: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32)
+    ; SI-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
+    ; SI-NEXT: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32)
+    ; SI-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; SI-NEXT: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32)
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>)
+    ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x s1>)
     ; CI-HSA-LABEL: name: test_load_global_v32s1_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
-    ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CI-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
-    ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CI-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
-    ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; CI-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
-    ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-HSA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32)
-    ; CI-HSA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
-    ; CI-HSA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32)
-    ; CI-HSA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-    ; CI-HSA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32)
-    ; CI-HSA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
-    ; CI-HSA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32)
-    ; CI-HSA: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI-HSA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32)
-    ; CI-HSA: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
-    ; CI-HSA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32)
-    ; CI-HSA: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
-    ; CI-HSA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32)
-    ; CI-HSA: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
-    ; CI-HSA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32)
-    ; CI-HSA: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32)
-    ; CI-HSA: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
-    ; CI-HSA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32)
-    ; CI-HSA: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18
-    ; CI-HSA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32)
-    ; CI-HSA: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
-    ; CI-HSA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32)
-    ; CI-HSA: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; CI-HSA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32)
-    ; CI-HSA: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21
-    ; CI-HSA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32)
-    ; CI-HSA: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22
-    ; CI-HSA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32)
-    ; CI-HSA: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
-    ; CI-HSA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32)
-    ; CI-HSA: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-HSA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32)
-    ; CI-HSA: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
-    ; CI-HSA: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32)
-    ; CI-HSA: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26
-    ; CI-HSA: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32)
-    ; CI-HSA: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27
-    ; CI-HSA: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32)
-    ; CI-HSA: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; CI-HSA: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32)
-    ; CI-HSA: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
-    ; CI-HSA: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32)
-    ; CI-HSA: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
-    ; CI-HSA: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32)
-    ; CI-HSA: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
-    ; CI-HSA: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32)
-    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32)
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>)
-    ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<32 x s1>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-HSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CI-HSA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
+    ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CI-HSA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
+    ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; CI-HSA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
+    ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-HSA-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32)
+    ; CI-HSA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+    ; CI-HSA-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32)
+    ; CI-HSA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+    ; CI-HSA-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32)
+    ; CI-HSA-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+    ; CI-HSA-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32)
+    ; CI-HSA-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-HSA-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32)
+    ; CI-HSA-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+    ; CI-HSA-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32)
+    ; CI-HSA-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+    ; CI-HSA-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32)
+    ; CI-HSA-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; CI-HSA-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32)
+    ; CI-HSA-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32)
+    ; CI-HSA-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
+    ; CI-HSA-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32)
+    ; CI-HSA-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18
+    ; CI-HSA-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32)
+    ; CI-HSA-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
+    ; CI-HSA-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32)
+    ; CI-HSA-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; CI-HSA-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32)
+    ; CI-HSA-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21
+    ; CI-HSA-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32)
+    ; CI-HSA-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22
+    ; CI-HSA-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32)
+    ; CI-HSA-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; CI-HSA-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32)
+    ; CI-HSA-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-HSA-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32)
+    ; CI-HSA-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
+    ; CI-HSA-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32)
+    ; CI-HSA-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26
+    ; CI-HSA-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32)
+    ; CI-HSA-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27
+    ; CI-HSA-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32)
+    ; CI-HSA-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; CI-HSA-NEXT: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32)
+    ; CI-HSA-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
+    ; CI-HSA-NEXT: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32)
+    ; CI-HSA-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
+    ; CI-HSA-NEXT: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32)
+    ; CI-HSA-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; CI-HSA-NEXT: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32)
+    ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32)
+    ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x s1>)
     ; CI-MESA-LABEL: name: test_load_global_v32s1_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; CI-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32)
-    ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
-    ; CI-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32)
-    ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-    ; CI-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32)
-    ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
-    ; CI-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32)
-    ; CI-MESA: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32)
-    ; CI-MESA: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
-    ; CI-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32)
-    ; CI-MESA: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
-    ; CI-MESA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32)
-    ; CI-MESA: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
-    ; CI-MESA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32)
-    ; CI-MESA: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32)
-    ; CI-MESA: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
-    ; CI-MESA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32)
-    ; CI-MESA: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18
-    ; CI-MESA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32)
-    ; CI-MESA: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
-    ; CI-MESA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32)
-    ; CI-MESA: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; CI-MESA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32)
-    ; CI-MESA: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21
-    ; CI-MESA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32)
-    ; CI-MESA: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22
-    ; CI-MESA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32)
-    ; CI-MESA: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
-    ; CI-MESA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32)
-    ; CI-MESA: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-MESA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32)
-    ; CI-MESA: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
-    ; CI-MESA: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32)
-    ; CI-MESA: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26
-    ; CI-MESA: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32)
-    ; CI-MESA: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27
-    ; CI-MESA: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32)
-    ; CI-MESA: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; CI-MESA: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32)
-    ; CI-MESA: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
-    ; CI-MESA: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32)
-    ; CI-MESA: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
-    ; CI-MESA: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32)
-    ; CI-MESA: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
-    ; CI-MESA: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>)
-    ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<32 x s1>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-MESA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CI-MESA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CI-MESA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; CI-MESA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32)
+    ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+    ; CI-MESA-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32)
+    ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+    ; CI-MESA-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32)
+    ; CI-MESA-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+    ; CI-MESA-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32)
+    ; CI-MESA-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-MESA-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32)
+    ; CI-MESA-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+    ; CI-MESA-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32)
+    ; CI-MESA-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+    ; CI-MESA-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32)
+    ; CI-MESA-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; CI-MESA-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32)
+    ; CI-MESA-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32)
+    ; CI-MESA-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
+    ; CI-MESA-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32)
+    ; CI-MESA-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18
+    ; CI-MESA-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32)
+    ; CI-MESA-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
+    ; CI-MESA-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32)
+    ; CI-MESA-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; CI-MESA-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32)
+    ; CI-MESA-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21
+    ; CI-MESA-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32)
+    ; CI-MESA-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22
+    ; CI-MESA-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32)
+    ; CI-MESA-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; CI-MESA-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32)
+    ; CI-MESA-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-MESA-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32)
+    ; CI-MESA-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
+    ; CI-MESA-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32)
+    ; CI-MESA-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26
+    ; CI-MESA-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32)
+    ; CI-MESA-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27
+    ; CI-MESA-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32)
+    ; CI-MESA-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; CI-MESA-NEXT: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32)
+    ; CI-MESA-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
+    ; CI-MESA-NEXT: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32)
+    ; CI-MESA-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
+    ; CI-MESA-NEXT: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32)
+    ; CI-MESA-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; CI-MESA-NEXT: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32)
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32)
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x s1>)
     ; VI-LABEL: name: test_load_global_v32s1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32)
-    ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32)
-    ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32)
-    ; VI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32)
-    ; VI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
-    ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32)
-    ; VI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
-    ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32)
-    ; VI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
-    ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32)
-    ; VI: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32)
-    ; VI: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
-    ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32)
-    ; VI: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18
-    ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32)
-    ; VI: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
-    ; VI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32)
-    ; VI: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; VI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32)
-    ; VI: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21
-    ; VI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32)
-    ; VI: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22
-    ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32)
-    ; VI: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
-    ; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32)
-    ; VI: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32)
-    ; VI: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
-    ; VI: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32)
-    ; VI: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26
-    ; VI: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32)
-    ; VI: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27
-    ; VI: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32)
-    ; VI: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; VI: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32)
-    ; VI: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
-    ; VI: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32)
-    ; VI: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
-    ; VI: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32)
-    ; VI: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
-    ; VI: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>)
-    ; VI: $vgpr0 = COPY [[TRUNC]](<32 x s1>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32)
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32)
+    ; VI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32)
+    ; VI-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32)
+    ; VI-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+    ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32)
+    ; VI-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+    ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32)
+    ; VI-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32)
+    ; VI-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32)
+    ; VI-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
+    ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32)
+    ; VI-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18
+    ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32)
+    ; VI-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
+    ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32)
+    ; VI-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32)
+    ; VI-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21
+    ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32)
+    ; VI-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22
+    ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32)
+    ; VI-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32)
+    ; VI-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32)
+    ; VI-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
+    ; VI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32)
+    ; VI-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26
+    ; VI-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32)
+    ; VI-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27
+    ; VI-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32)
+    ; VI-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; VI-NEXT: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32)
+    ; VI-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
+    ; VI-NEXT: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32)
+    ; VI-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
+    ; VI-NEXT: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32)
+    ; VI-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; VI-NEXT: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32)
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>)
+    ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x s1>)
     ; GFX9-HSA-LABEL: name: test_load_global_v32s1_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
-    ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; GFX9-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
-    ; GFX9-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX9-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
-    ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; GFX9-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
-    ; GFX9-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-HSA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32)
-    ; GFX9-HSA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
-    ; GFX9-HSA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32)
-    ; GFX9-HSA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-    ; GFX9-HSA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32)
-    ; GFX9-HSA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
-    ; GFX9-HSA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32)
-    ; GFX9-HSA: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9-HSA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32)
-    ; GFX9-HSA: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
-    ; GFX9-HSA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32)
-    ; GFX9-HSA: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
-    ; GFX9-HSA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32)
-    ; GFX9-HSA: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
-    ; GFX9-HSA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32)
-    ; GFX9-HSA: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32)
-    ; GFX9-HSA: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
-    ; GFX9-HSA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32)
-    ; GFX9-HSA: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18
-    ; GFX9-HSA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32)
-    ; GFX9-HSA: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
-    ; GFX9-HSA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32)
-    ; GFX9-HSA: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; GFX9-HSA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32)
-    ; GFX9-HSA: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21
-    ; GFX9-HSA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32)
-    ; GFX9-HSA: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22
-    ; GFX9-HSA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32)
-    ; GFX9-HSA: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
-    ; GFX9-HSA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32)
-    ; GFX9-HSA: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-HSA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32)
-    ; GFX9-HSA: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
-    ; GFX9-HSA: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32)
-    ; GFX9-HSA: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26
-    ; GFX9-HSA: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32)
-    ; GFX9-HSA: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27
-    ; GFX9-HSA: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32)
-    ; GFX9-HSA: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; GFX9-HSA: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32)
-    ; GFX9-HSA: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
-    ; GFX9-HSA: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32)
-    ; GFX9-HSA: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
-    ; GFX9-HSA: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32)
-    ; GFX9-HSA: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
-    ; GFX9-HSA: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LSHR]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[LSHR4]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[LSHR6]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR7]](s32), [[LSHR8]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR9]](s32), [[LSHR10]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR11]](s32), [[LSHR12]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR13]](s32), [[LSHR14]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR15]](s32), [[LSHR16]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR17]](s32), [[LSHR18]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR19]](s32), [[LSHR20]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR21]](s32), [[LSHR22]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR23]](s32), [[LSHR24]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR25]](s32), [[LSHR26]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR27]](s32), [[LSHR28]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC15:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR29]](s32), [[LSHR30]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>), [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>), [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[BUILD_VECTOR_TRUNC11]](<2 x s16>), [[BUILD_VECTOR_TRUNC12]](<2 x s16>), [[BUILD_VECTOR_TRUNC13]](<2 x s16>), [[BUILD_VECTOR_TRUNC14]](<2 x s16>), [[BUILD_VECTOR_TRUNC15]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[CONCAT_VECTORS]](<32 x s16>)
-    ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<32 x s1>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-HSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; GFX9-HSA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
+    ; GFX9-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX9-HSA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
+    ; GFX9-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; GFX9-HSA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
+    ; GFX9-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-HSA-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32)
+    ; GFX9-HSA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+    ; GFX9-HSA-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32)
+    ; GFX9-HSA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+    ; GFX9-HSA-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32)
+    ; GFX9-HSA-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+    ; GFX9-HSA-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32)
+    ; GFX9-HSA-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-HSA-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32)
+    ; GFX9-HSA-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+    ; GFX9-HSA-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32)
+    ; GFX9-HSA-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+    ; GFX9-HSA-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32)
+    ; GFX9-HSA-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; GFX9-HSA-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32)
+    ; GFX9-HSA-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32)
+    ; GFX9-HSA-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
+    ; GFX9-HSA-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32)
+    ; GFX9-HSA-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18
+    ; GFX9-HSA-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32)
+    ; GFX9-HSA-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
+    ; GFX9-HSA-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32)
+    ; GFX9-HSA-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; GFX9-HSA-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32)
+    ; GFX9-HSA-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21
+    ; GFX9-HSA-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32)
+    ; GFX9-HSA-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22
+    ; GFX9-HSA-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32)
+    ; GFX9-HSA-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; GFX9-HSA-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32)
+    ; GFX9-HSA-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-HSA-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32)
+    ; GFX9-HSA-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
+    ; GFX9-HSA-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32)
+    ; GFX9-HSA-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26
+    ; GFX9-HSA-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32)
+    ; GFX9-HSA-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27
+    ; GFX9-HSA-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32)
+    ; GFX9-HSA-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; GFX9-HSA-NEXT: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32)
+    ; GFX9-HSA-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
+    ; GFX9-HSA-NEXT: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32)
+    ; GFX9-HSA-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
+    ; GFX9-HSA-NEXT: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32)
+    ; GFX9-HSA-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX9-HSA-NEXT: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LSHR]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[LSHR4]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[LSHR6]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR7]](s32), [[LSHR8]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR9]](s32), [[LSHR10]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR11]](s32), [[LSHR12]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR13]](s32), [[LSHR14]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR15]](s32), [[LSHR16]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR17]](s32), [[LSHR18]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR19]](s32), [[LSHR20]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR21]](s32), [[LSHR22]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR23]](s32), [[LSHR24]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR25]](s32), [[LSHR26]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR27]](s32), [[LSHR28]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC15:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR29]](s32), [[LSHR30]](s32)
+    ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>), [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>), [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[BUILD_VECTOR_TRUNC11]](<2 x s16>), [[BUILD_VECTOR_TRUNC12]](<2 x s16>), [[BUILD_VECTOR_TRUNC13]](<2 x s16>), [[BUILD_VECTOR_TRUNC14]](<2 x s16>), [[BUILD_VECTOR_TRUNC15]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[CONCAT_VECTORS]](<32 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x s1>)
     ; GFX9-MESA-LABEL: name: test_load_global_v32s1_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
-    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; GFX9-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
-    ; GFX9-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32)
-    ; GFX9-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
-    ; GFX9-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32)
-    ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-    ; GFX9-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32)
-    ; GFX9-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
-    ; GFX9-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32)
-    ; GFX9-MESA: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32)
-    ; GFX9-MESA: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
-    ; GFX9-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32)
-    ; GFX9-MESA: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
-    ; GFX9-MESA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32)
-    ; GFX9-MESA: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
-    ; GFX9-MESA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32)
-    ; GFX9-MESA: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32)
-    ; GFX9-MESA: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
-    ; GFX9-MESA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32)
-    ; GFX9-MESA: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18
-    ; GFX9-MESA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32)
-    ; GFX9-MESA: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
-    ; GFX9-MESA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32)
-    ; GFX9-MESA: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; GFX9-MESA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32)
-    ; GFX9-MESA: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21
-    ; GFX9-MESA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32)
-    ; GFX9-MESA: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22
-    ; GFX9-MESA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32)
-    ; GFX9-MESA: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
-    ; GFX9-MESA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32)
-    ; GFX9-MESA: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-MESA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32)
-    ; GFX9-MESA: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
-    ; GFX9-MESA: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32)
-    ; GFX9-MESA: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26
-    ; GFX9-MESA: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32)
-    ; GFX9-MESA: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27
-    ; GFX9-MESA: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32)
-    ; GFX9-MESA: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; GFX9-MESA: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32)
-    ; GFX9-MESA: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
-    ; GFX9-MESA: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32)
-    ; GFX9-MESA: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
-    ; GFX9-MESA: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32)
-    ; GFX9-MESA: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
-    ; GFX9-MESA: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LSHR]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[LSHR4]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[LSHR6]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR7]](s32), [[LSHR8]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR9]](s32), [[LSHR10]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR11]](s32), [[LSHR12]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR13]](s32), [[LSHR14]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR15]](s32), [[LSHR16]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR17]](s32), [[LSHR18]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR19]](s32), [[LSHR20]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR21]](s32), [[LSHR22]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR23]](s32), [[LSHR24]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR25]](s32), [[LSHR26]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR27]](s32), [[LSHR28]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC15:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR29]](s32), [[LSHR30]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>), [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>), [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[BUILD_VECTOR_TRUNC11]](<2 x s16>), [[BUILD_VECTOR_TRUNC12]](<2 x s16>), [[BUILD_VECTOR_TRUNC13]](<2 x s16>), [[BUILD_VECTOR_TRUNC14]](<2 x s16>), [[BUILD_VECTOR_TRUNC15]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[CONCAT_VECTORS]](<32 x s16>)
-    ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<32 x s1>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-MESA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; GFX9-MESA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX9-MESA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; GFX9-MESA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32)
+    ; GFX9-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+    ; GFX9-MESA-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32)
+    ; GFX9-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+    ; GFX9-MESA-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32)
+    ; GFX9-MESA-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+    ; GFX9-MESA-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32)
+    ; GFX9-MESA-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-MESA-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32)
+    ; GFX9-MESA-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+    ; GFX9-MESA-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32)
+    ; GFX9-MESA-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+    ; GFX9-MESA-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32)
+    ; GFX9-MESA-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; GFX9-MESA-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32)
+    ; GFX9-MESA-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32)
+    ; GFX9-MESA-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
+    ; GFX9-MESA-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32)
+    ; GFX9-MESA-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18
+    ; GFX9-MESA-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32)
+    ; GFX9-MESA-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
+    ; GFX9-MESA-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32)
+    ; GFX9-MESA-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; GFX9-MESA-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32)
+    ; GFX9-MESA-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21
+    ; GFX9-MESA-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32)
+    ; GFX9-MESA-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22
+    ; GFX9-MESA-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32)
+    ; GFX9-MESA-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; GFX9-MESA-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32)
+    ; GFX9-MESA-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-MESA-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32)
+    ; GFX9-MESA-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
+    ; GFX9-MESA-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32)
+    ; GFX9-MESA-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26
+    ; GFX9-MESA-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32)
+    ; GFX9-MESA-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27
+    ; GFX9-MESA-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32)
+    ; GFX9-MESA-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; GFX9-MESA-NEXT: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32)
+    ; GFX9-MESA-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
+    ; GFX9-MESA-NEXT: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32)
+    ; GFX9-MESA-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
+    ; GFX9-MESA-NEXT: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32)
+    ; GFX9-MESA-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX9-MESA-NEXT: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LSHR]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[LSHR4]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[LSHR6]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR7]](s32), [[LSHR8]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR9]](s32), [[LSHR10]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR11]](s32), [[LSHR12]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR13]](s32), [[LSHR14]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR15]](s32), [[LSHR16]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR17]](s32), [[LSHR18]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR19]](s32), [[LSHR20]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR21]](s32), [[LSHR22]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR23]](s32), [[LSHR24]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR25]](s32), [[LSHR26]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR27]](s32), [[LSHR28]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC15:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR29]](s32), [[LSHR30]](s32)
+    ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>), [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>), [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[BUILD_VECTOR_TRUNC11]](<2 x s16>), [[BUILD_VECTOR_TRUNC12]](<2 x s16>), [[BUILD_VECTOR_TRUNC13]](<2 x s16>), [[BUILD_VECTOR_TRUNC14]](<2 x s16>), [[BUILD_VECTOR_TRUNC15]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[CONCAT_VECTORS]](<32 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x s1>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<32 x s1>) = G_LOAD %0 :: (load (<32 x s1>), align 4, addrspace 1)
     $vgpr0 = COPY %1
@@ -17744,132 +17744,132 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v8s4_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>)
-    ; SI: $vgpr0 = COPY [[TRUNC]](<8 x s4>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32)
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>)
+    ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x s4>)
     ; CI-HSA-LABEL: name: test_load_global_v8s4_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
-    ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; CI-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
-    ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
-    ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; CI-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
-    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32)
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>)
-    ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<8 x s4>)
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; CI-HSA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
+    ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-HSA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
+    ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; CI-HSA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
+    ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32)
+    ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x s4>)
     ; CI-MESA-LABEL: name: test_load_global_v8s4_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; CI-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>)
-    ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<8 x s4>)
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; CI-MESA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
+    ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-MESA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
+    ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; CI-MESA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
+    ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32)
+    ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x s4>)
     ; VI-LABEL: name: test_load_global_v8s4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>)
-    ; VI: $vgpr0 = COPY [[TRUNC]](<8 x s4>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32)
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>)
+    ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x s4>)
     ; GFX9-HSA-LABEL: name: test_load_global_v8s4_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
-    ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; GFX9-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
-    ; GFX9-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
-    ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; GFX9-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LSHR]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[LSHR4]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[LSHR6]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>)
-    ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<8 x s4>)
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; GFX9-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; GFX9-HSA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
+    ; GFX9-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-HSA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
+    ; GFX9-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; GFX9-HSA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LSHR]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[LSHR4]](s32)
+    ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[LSHR6]](s32)
+    ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x s4>)
     ; GFX9-MESA-LABEL: name: test_load_global_v8s4_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
-    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
-    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
-    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; GFX9-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LSHR]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[LSHR4]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[LSHR6]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>)
-    ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<8 x s4>)
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; GFX9-MESA-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
+    ; GFX9-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-MESA-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32)
+    ; GFX9-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; GFX9-MESA-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LSHR]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[LSHR2]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[LSHR4]](s32)
+    ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[LSHR6]](s32)
+    ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x s4>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<8 x s4>) = G_LOAD %0 :: (load (<8 x s4>), align 4, addrspace 1)
     $vgpr0 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
index 5b356ae4288c6..40c4047791328 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
@@ -16,52 +16,52 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s1_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; SI: $vgpr0 = COPY [[AND]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; SI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; CI-LABEL: name: test_load_local_s1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; CI: $vgpr0 = COPY [[AND]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; CI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; CI-DS128-LABEL: name: test_load_local_s1_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; CI-DS128: $vgpr0 = COPY [[AND]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; VI-LABEL: name: test_load_local_s1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; VI: $vgpr0 = COPY [[AND]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; VI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; GFX9-LABEL: name: test_load_local_s1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; GFX9: $vgpr0 = COPY [[AND]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s1_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[AND]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; GFX10-LABEL: name: test_load_local_s1_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; GFX10: $vgpr0 = COPY [[AND]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s1_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[AND]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s1) = G_LOAD %0 :: (load (s1), align 1, addrspace 3)
     %2:_(s32) = G_ZEXT %1
@@ -76,52 +76,52 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s2_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; SI: $vgpr0 = COPY [[AND]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; SI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; CI-LABEL: name: test_load_local_s2_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; CI: $vgpr0 = COPY [[AND]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; CI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; CI-DS128-LABEL: name: test_load_local_s2_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; CI-DS128: $vgpr0 = COPY [[AND]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; VI-LABEL: name: test_load_local_s2_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; VI: $vgpr0 = COPY [[AND]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; VI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; GFX9-LABEL: name: test_load_local_s2_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; GFX9: $vgpr0 = COPY [[AND]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s2_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[AND]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; GFX10-LABEL: name: test_load_local_s2_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; GFX10: $vgpr0 = COPY [[AND]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s2_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[AND]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s2) = G_LOAD %0 :: (load (s2), align 1, addrspace 3)
     %2:_(s32) = G_ZEXT %1
@@ -136,36 +136,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s8_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-LABEL: name: test_load_local_s8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-DS128-LABEL: name: test_load_local_s8_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; CI-DS128: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_local_s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_local_s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s8_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-LABEL: name: test_load_local_s8_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; GFX10: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s8_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s8) = G_LOAD %0 :: (load (s8), align 4, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -180,36 +180,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s8_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-LABEL: name: test_load_local_s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-DS128-LABEL: name: test_load_local_s8_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_local_s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_local_s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s8_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-LABEL: name: test_load_local_s8_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s8_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s8) = G_LOAD %0 :: (load (s8), align 1, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -224,36 +224,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-LABEL: name: test_load_local_s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-DS128-LABEL: name: test_load_local_s16_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; CI-DS128: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_local_s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_local_s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s16_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-LABEL: name: test_load_local_s16_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; GFX10: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s16_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s16) = G_LOAD %0 :: (load (s16), align 4, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -268,36 +268,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-LABEL: name: test_load_local_s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-DS128-LABEL: name: test_load_local_s16_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI-DS128: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_local_s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_local_s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s16_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-LABEL: name: test_load_local_s16_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s16_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s16) = G_LOAD %0 :: (load (s16), align 2, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -312,123 +312,123 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-LABEL: name: test_load_local_s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-DS128-LABEL: name: test_load_local_s16_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI-DS128: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_local_s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_local_s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s16_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-LABEL: name: test_load_local_s16_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX10: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s16_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -443,36 +443,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-LABEL: name: test_load_local_s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-DS128-LABEL: name: test_load_local_s32_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; CI-DS128: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_local_s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_local_s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s32_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-LABEL: name: test_load_local_s32_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX10: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s32_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 3)
     $vgpr0 = COPY %1
@@ -486,99 +486,99 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s32_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: $vgpr0 = COPY [[OR]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; CI-LABEL: name: test_load_local_s32_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: $vgpr0 = COPY [[OR]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; CI-DS128-LABEL: name: test_load_local_s32_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: $vgpr0 = COPY [[OR]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; VI-LABEL: name: test_load_local_s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: $vgpr0 = COPY [[OR]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; GFX9-LABEL: name: test_load_local_s32_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: $vgpr0 = COPY [[OR]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s32_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-LABEL: name: test_load_local_s32_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: $vgpr0 = COPY [[OR]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s32_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[OR]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 3)
     $vgpr0 = COPY %1
@@ -592,197 +592,197 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s32_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-LABEL: name: test_load_local_s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-DS128-LABEL: name: test_load_local_s32_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-DS128: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_local_s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_local_s32_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s32_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-LABEL: name: test_load_local_s32_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s32_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 3)
     $vgpr0 = COPY %1
@@ -796,36 +796,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s24_align8
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-LABEL: name: test_load_local_s24_align8
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-DS128-LABEL: name: test_load_local_s24_align8
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
-    ; CI-DS128: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_local_s24_align8
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_local_s24_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align8
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-LABEL: name: test_load_local_s24_align8
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
-    ; GFX10: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align8
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 8, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -840,36 +840,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s24_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-LABEL: name: test_load_local_s24_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-DS128-LABEL: name: test_load_local_s24_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; CI-DS128: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_local_s24_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_local_s24_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-LABEL: name: test_load_local_s24_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX10: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -884,223 +884,223 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s24_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-LABEL: name: test_load_local_s24_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-DS128-LABEL: name: test_load_local_s24_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-DS128: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-DS128-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-DS128-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-DS128-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_local_s24_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_local_s24_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3)
-    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[OR]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; GFX10-LABEL: name: test_load_local_s24_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX10: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX10: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX10: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX10: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX10: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX10-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX10-UNALIGNED: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 2, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -1115,230 +1115,230 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s24_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-LABEL: name: test_load_local_s24_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-DS128-LABEL: name: test_load_local_s24_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-DS128: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-DS128-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-DS128-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-DS128-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_local_s24_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_local_s24_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[OR]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; GFX10-LABEL: name: test_load_local_s24_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX10: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX10: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX10: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX10: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX10: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX10-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX10-UNALIGNED: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 1, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -1353,36 +1353,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s48_align8
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; CI-LABEL: name: test_load_local_s48_align8
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; CI-DS128-LABEL: name: test_load_local_s48_align8
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; VI-LABEL: name: test_load_local_s48_align8
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX9-LABEL: name: test_load_local_s48_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s48_align8
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX10-LABEL: name: test_load_local_s48_align8
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s48_align8
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s48) = G_LOAD %0 :: (load (s48), align 8, addrspace 3)
     %2:_(s64) = G_ANYEXT %1
@@ -1397,36 +1397,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s64_align8
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; CI-LABEL: name: test_load_local_s64_align8
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; CI-DS128-LABEL: name: test_load_local_s64_align8
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; VI-LABEL: name: test_load_local_s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX9-LABEL: name: test_load_local_s64_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align8
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX10-LABEL: name: test_load_local_s64_align8
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align8
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -1440,36 +1440,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s64_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; CI-LABEL: name: test_load_local_s64_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; CI-DS128-LABEL: name: test_load_local_s64_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; VI-LABEL: name: test_load_local_s64_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX9-LABEL: name: test_load_local_s64_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX10-LABEL: name: test_load_local_s64_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -1483,176 +1483,176 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s64_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; CI-LABEL: name: test_load_local_s64_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; CI-DS128-LABEL: name: test_load_local_s64_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-DS128: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-DS128-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; VI-LABEL: name: test_load_local_s64_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; GFX9-LABEL: name: test_load_local_s64_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 2, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 2, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX10-LABEL: name: test_load_local_s64_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -1666,447 +1666,447 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s64_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; SI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; CI-LABEL: name: test_load_local_s64_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; CI-DS128-LABEL: name: test_load_local_s64_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI-DS128: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-DS128: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-DS128: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI-DS128: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-DS128: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI-DS128: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI-DS128: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-DS128-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-DS128-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-DS128-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-DS128-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-DS128-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-DS128-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-DS128-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-DS128-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-DS128-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-DS128-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-DS128-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-DS128-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-DS128-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-DS128-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-DS128-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-DS128-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-DS128-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; VI-LABEL: name: test_load_local_s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; GFX9-LABEL: name: test_load_local_s64_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; GFX10-LABEL: name: test_load_local_s64_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX10: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX10: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX10: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX10: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX10: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX10: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX10: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX10: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX10: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX10: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX10: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX10: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX10: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX10: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX10: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX10: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX10: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX10-UNALIGNED: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX10-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX10-UNALIGNED: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX10-UNALIGNED: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX10-UNALIGNED: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX10-UNALIGNED: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX10-UNALIGNED: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX10-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX10-UNALIGNED: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX10-UNALIGNED: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX10-UNALIGNED-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -2120,471 +2120,471 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s96_align16
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-LABEL: name: test_load_local_s96_align16
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-DS128-LABEL: name: test_load_local_s96_align16
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI-DS128: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI-DS128: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI-DS128: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI-DS128: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-DS128-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-DS128-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-DS128-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; CI-DS128-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-DS128-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-DS128-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-DS128-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_local_s96_align16
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_local_s96_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align16
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX10-LABEL: name: test_load_local_s96_align16
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX10: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX10: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX10: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX10: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX10: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX10: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX10: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX10: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX10: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX10: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX10: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX10: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX10: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX10: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX10-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX10-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX10-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align16
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX10-UNALIGNED: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX10-UNALIGNED: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-UNALIGNED-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX10-UNALIGNED-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 3)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -2598,79 +2598,79 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s96_align8
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-LABEL: name: test_load_local_s96_align8
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
-    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-DS128-LABEL: name: test_load_local_s96_align8
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
-    ; CI-DS128: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
+    ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_local_s96_align8
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_local_s96_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align8
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 8, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 8, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX10-LABEL: name: test_load_local_s96_align8
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
-    ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align8
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 8, addrspace 3)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -2684,79 +2684,79 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s96_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-LABEL: name: test_load_local_s96_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-DS128-LABEL: name: test_load_local_s96_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_local_s96_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_local_s96_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX10-LABEL: name: test_load_local_s96_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -2770,240 +2770,240 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s96_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-LABEL: name: test_load_local_s96_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-DS128-LABEL: name: test_load_local_s96_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_local_s96_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_local_s96_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 2, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 2, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX10-LABEL: name: test_load_local_s96_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX10: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 3)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -3017,471 +3017,471 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s96_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-LABEL: name: test_load_local_s96_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-DS128-LABEL: name: test_load_local_s96_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI-DS128: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI-DS128: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI-DS128: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI-DS128: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-DS128-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-DS128-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-DS128-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; CI-DS128-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-DS128-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-DS128-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-DS128-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_local_s96_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_local_s96_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX10-LABEL: name: test_load_local_s96_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX10: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX10: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX10: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX10: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX10: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX10: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX10: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX10: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX10: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX10: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX10: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX10: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX10: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX10: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX10-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX10-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX10-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX10-UNALIGNED: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX10-UNALIGNED: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-UNALIGNED-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX10-UNALIGNED-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 3)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -3495,606 +3495,606 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s128_align16
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-LABEL: name: test_load_local_s128_align16
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-DS128-LABEL: name: test_load_local_s128_align16
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI-DS128: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI-DS128: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI-DS128: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI-DS128: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI-DS128: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; CI-DS128: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; CI-DS128: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; CI-DS128: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; CI-DS128: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI-DS128: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI-DS128: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI-DS128: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI-DS128: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI-DS128: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI-DS128: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI-DS128: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI-DS128: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI-DS128: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-DS128-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-DS128-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-DS128-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; CI-DS128-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-DS128-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-DS128-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-DS128-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-DS128-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-DS128-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; CI-DS128-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; CI-DS128-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-DS128-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-DS128-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-DS128-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_local_s128_align16
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-LABEL: name: test_load_local_s128_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align16
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX10-LABEL: name: test_load_local_s128_align16
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX10: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX10: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX10: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX10: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX10: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX10: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX10: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX10: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX10: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX10: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX10: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX10: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX10: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX10: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX10: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX10: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX10: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX10: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX10: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX10: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX10: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX10: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX10: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX10: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX10: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX10: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX10: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX10: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX10-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX10-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX10-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; GFX10-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX10-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX10-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX10-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX10-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align16
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX10-UNALIGNED: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX10-UNALIGNED: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10-UNALIGNED: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX10-UNALIGNED: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX10-UNALIGNED: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-UNALIGNED-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX10-UNALIGNED-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX10-UNALIGNED-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX10-UNALIGNED-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX10-UNALIGNED-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p3) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -4108,52 +4108,52 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s128_align8
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-LABEL: name: test_load_local_s128_align8
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-DS128-LABEL: name: test_load_local_s128_align8
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_local_s128_align8
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-LABEL: name: test_load_local_s128_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align8
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX10-LABEL: name: test_load_local_s128_align8
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align8
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p3) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 8, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -4167,102 +4167,102 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s128_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-LABEL: name: test_load_local_s128_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-DS128-LABEL: name: test_load_local_s128_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_local_s128_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-LABEL: name: test_load_local_s128_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX10-LABEL: name: test_load_local_s128_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p3) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -4276,310 +4276,310 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s128_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-LABEL: name: test_load_local_s128_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-DS128-LABEL: name: test_load_local_s128_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
-    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; CI-DS128-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; CI-DS128-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; CI-DS128-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
+    ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_local_s128_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-LABEL: name: test_load_local_s128_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 2, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 2, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX10-LABEL: name: test_load_local_s128_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX10: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; GFX10: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX10: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
+    ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p3) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 2, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -4593,606 +4593,606 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s128_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-LABEL: name: test_load_local_s128_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-DS128-LABEL: name: test_load_local_s128_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI-DS128: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI-DS128: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI-DS128: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI-DS128: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI-DS128: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; CI-DS128: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; CI-DS128: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; CI-DS128: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; CI-DS128: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI-DS128: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI-DS128: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI-DS128: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI-DS128: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI-DS128: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI-DS128: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI-DS128: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI-DS128: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI-DS128: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-DS128-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-DS128-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-DS128-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; CI-DS128-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-DS128-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-DS128-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-DS128-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-DS128-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-DS128-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; CI-DS128-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; CI-DS128-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-DS128-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-DS128-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-DS128-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_local_s128_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-LABEL: name: test_load_local_s128_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX10-LABEL: name: test_load_local_s128_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX10: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX10: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX10: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX10: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX10: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX10: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX10: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX10: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX10: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX10: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX10: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX10: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX10: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX10: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX10: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX10: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX10: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX10: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX10: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX10: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX10: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX10: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX10: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX10: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX10: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX10: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX10: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX10: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX10-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX10-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX10-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; GFX10-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX10-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX10-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX10-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX10-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX10-UNALIGNED: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX10-UNALIGNED: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10-UNALIGNED: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX10-UNALIGNED: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX10-UNALIGNED: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-UNALIGNED-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX10-UNALIGNED-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX10-UNALIGNED-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX10-UNALIGNED-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX10-UNALIGNED-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p3) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -5206,36 +5206,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_p1_align8
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; CI-LABEL: name: test_load_local_p1_align8
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; CI-DS128-LABEL: name: test_load_local_p1_align8
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; VI-LABEL: name: test_load_local_p1_align8
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX9-LABEL: name: test_load_local_p1_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align8
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX10-LABEL: name: test_load_local_p1_align8
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align8
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     %0:_(p3) = COPY $vgpr0
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -5249,36 +5249,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_p1_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; CI-LABEL: name: test_load_local_p1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; CI-DS128-LABEL: name: test_load_local_p1_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; VI-LABEL: name: test_load_local_p1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX9-LABEL: name: test_load_local_p1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX10-LABEL: name: test_load_local_p1_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     %0:_(p3) = COPY $vgpr0
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -5292,176 +5292,176 @@ body: |
 
     ; SI-LABEL: name: test_load_local_p1_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; CI-LABEL: name: test_load_local_p1_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; CI-DS128-LABEL: name: test_load_local_p1_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-DS128: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-DS128-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; VI-LABEL: name: test_load_local_p1_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; GFX9-LABEL: name: test_load_local_p1_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 2, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 2, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX10-LABEL: name: test_load_local_p1_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     %0:_(p3) = COPY $vgpr0
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 2, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -5475,447 +5475,447 @@ body: |
 
     ; SI-LABEL: name: test_load_local_p1_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; SI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; CI-LABEL: name: test_load_local_p1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; CI-DS128-LABEL: name: test_load_local_p1_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI-DS128: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-DS128: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-DS128: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI-DS128: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-DS128: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI-DS128: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI-DS128: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-DS128-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-DS128-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-DS128-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-DS128-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-DS128-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-DS128-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-DS128-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-DS128-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-DS128-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-DS128-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-DS128-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-DS128-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-DS128-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-DS128-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-DS128-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-DS128-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-DS128-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; VI-LABEL: name: test_load_local_p1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; GFX9-LABEL: name: test_load_local_p1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX10-LABEL: name: test_load_local_p1_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX10: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX10: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX10: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX10: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX10: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX10: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX10: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX10: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX10: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX10: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX10: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX10: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX10: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX10: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX10: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX10: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX10: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX10: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX10-UNALIGNED: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX10-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX10-UNALIGNED: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX10-UNALIGNED: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX10-UNALIGNED: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX10-UNALIGNED: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX10-UNALIGNED: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX10-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX10-UNALIGNED: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX10-UNALIGNED: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX10-UNALIGNED-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     %0:_(p3) = COPY $vgpr0
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -5929,36 +5929,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_p3_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3)
-    ; SI: $vgpr0 = COPY [[LOAD]](p3)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; CI-LABEL: name: test_load_local_p3_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3)
-    ; CI: $vgpr0 = COPY [[LOAD]](p3)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; CI-DS128-LABEL: name: test_load_local_p3_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3)
-    ; CI-DS128: $vgpr0 = COPY [[LOAD]](p3)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; VI-LABEL: name: test_load_local_p3_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3)
-    ; VI: $vgpr0 = COPY [[LOAD]](p3)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; GFX9-LABEL: name: test_load_local_p3_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_p3_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; GFX10-LABEL: name: test_load_local_p3_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3)
-    ; GFX10: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_p3_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     %0:_(p3) = COPY $vgpr0
     %1:_(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 3)
     $vgpr0 = COPY %1
@@ -5972,106 +5972,106 @@ body: |
 
     ; SI-LABEL: name: test_load_local_p3_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
-    ; SI: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; CI-LABEL: name: test_load_local_p3_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
-    ; CI: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; CI-DS128-LABEL: name: test_load_local_p3_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
-    ; CI-DS128: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; VI-LABEL: name: test_load_local_p3_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
-    ; VI: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; GFX9-LABEL: name: test_load_local_p3_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
-    ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_p3_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 2, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 2, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; GFX10-LABEL: name: test_load_local_p3_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
-    ; GFX10: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
+    ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_p3_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     %0:_(p3) = COPY $vgpr0
     %1:_(p3) = G_LOAD %0 :: (load (p3), align 2, addrspace 3)
     $vgpr0 = COPY %1
@@ -6085,204 +6085,204 @@ body: |
 
     ; SI-LABEL: name: test_load_local_p3_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; SI: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; CI-LABEL: name: test_load_local_p3_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; CI: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; CI-DS128-LABEL: name: test_load_local_p3_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-DS128: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; CI-DS128: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; VI-LABEL: name: test_load_local_p3_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; VI: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; GFX9-LABEL: name: test_load_local_p3_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_p3_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; GFX10-LABEL: name: test_load_local_p3_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; GFX10: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_p3_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     %0:_(p3) = COPY $vgpr0
     %1:_(p3) = G_LOAD %0 :: (load (p3), align 1, addrspace 3)
     $vgpr0 = COPY %1
@@ -6296,36 +6296,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_p5_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3)
-    ; SI: $vgpr0 = COPY [[LOAD]](p5)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; CI-LABEL: name: test_load_local_p5_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3)
-    ; CI: $vgpr0 = COPY [[LOAD]](p5)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; CI-DS128-LABEL: name: test_load_local_p5_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3)
-    ; CI-DS128: $vgpr0 = COPY [[LOAD]](p5)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; VI-LABEL: name: test_load_local_p5_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3)
-    ; VI: $vgpr0 = COPY [[LOAD]](p5)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; GFX9-LABEL: name: test_load_local_p5_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](p5)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_p5_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](p5)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; GFX10-LABEL: name: test_load_local_p5_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3)
-    ; GFX10: $vgpr0 = COPY [[LOAD]](p5)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_p5_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[LOAD]](p5)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     %0:_(p3) = COPY $vgpr0
     %1:_(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 3)
     $vgpr0 = COPY %1
@@ -6339,106 +6339,106 @@ body: |
 
     ; SI-LABEL: name: test_load_local_p5_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; SI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; CI-LABEL: name: test_load_local_p5_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; CI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; CI-DS128-LABEL: name: test_load_local_p5_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; CI-DS128: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; VI-LABEL: name: test_load_local_p5_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; VI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; GFX9-LABEL: name: test_load_local_p5_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_p5_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 2, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](p5)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 2, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; GFX10-LABEL: name: test_load_local_p5_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; GFX10: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_p5_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     %0:_(p3) = COPY $vgpr0
     %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 3)
     $vgpr0 = COPY %1
@@ -6452,204 +6452,204 @@ body: |
 
     ; SI-LABEL: name: test_load_local_p5_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; SI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; CI-LABEL: name: test_load_local_p5_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; CI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; CI-DS128-LABEL: name: test_load_local_p5_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-DS128: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; CI-DS128: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; VI-LABEL: name: test_load_local_p5_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; VI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; GFX9-LABEL: name: test_load_local_p5_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_p5_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](p5)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; GFX10-LABEL: name: test_load_local_p5_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; GFX10: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_p5_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     %0:_(p3) = COPY $vgpr0
     %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 3)
     $vgpr0 = COPY %1
@@ -6663,127 +6663,127 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v2s8_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-LABEL: name: test_load_local_v2s8_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-DS128-LABEL: name: test_load_local_v2s8_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI-DS128: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_local_v2s8_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_local_v2s8_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s8_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX10-LABEL: name: test_load_local_v2s8_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX10: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX10: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX10: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s8_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 3)
     %2:_(s16) = G_BITCAST %1
@@ -6799,67 +6799,67 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v2s8_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-LABEL: name: test_load_local_v2s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v2s8_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_local_v2s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_local_v2s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s8_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX10-LABEL: name: test_load_local_v2s8_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s8_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 3)
     %2:_(<2 x s32>) = G_ANYEXT %1
@@ -6874,226 +6874,226 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v3s8_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-LABEL: name: test_load_local_v3s8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-DS128-LABEL: name: test_load_local_v3s8_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-DS128: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-DS128-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-DS128-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-DS128-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_local_v3s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_local_v3s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s8_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9-UNALIGNED: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; GFX9-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-UNALIGNED: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9-UNALIGNED: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX10-LABEL: name: test_load_local_v3s8_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX10: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX10: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; GFX10: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX10: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX10: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX10: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s8_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX10-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX10-UNALIGNED: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), addrspace 1, align 4)
     %2:_(s24) = G_BITCAST %1
@@ -7109,252 +7109,252 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v3s8_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-LABEL: name: test_load_local_v3s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-DS128-LABEL: name: test_load_local_v3s8_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-DS128: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-DS128-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-DS128-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-DS128-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_local_v3s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_local_v3s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s8_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32)
-    ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
-    ; GFX9-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
-    ; GFX9-UNALIGNED: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
-    ; GFX9-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]]
-    ; GFX9-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; GFX9-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
-    ; GFX9-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
-    ; GFX9-UNALIGNED: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]]
-    ; GFX9-UNALIGNED: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9-UNALIGNED: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[OR3]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; GFX9-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]]
+    ; GFX9-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; GFX9-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]]
+    ; GFX9-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR3]](s32)
     ; GFX10-LABEL: name: test_load_local_v3s8_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX10: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX10: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX10: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX10: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX10: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s8_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX10-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX10-UNALIGNED: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 3)
     %2:_(s24) = G_BITCAST %1
@@ -7370,172 +7370,172 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v4s8_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-LABEL: name: test_load_local_v4s8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-DS128-LABEL: name: test_load_local_v4s8_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-DS128: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-DS128: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-DS128-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_local_v4s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_local_v4s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s8_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-UNALIGNED-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX10-LABEL: name: test_load_local_v4s8_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s8_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-UNALIGNED-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 3)
     %2:_(s32) = G_BITCAST %1
@@ -7550,292 +7550,292 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v8s8_align8
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-LABEL: name: test_load_local_v8s8_align8
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v8s8_align8
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; CI-DS128: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-DS128: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; CI-DS128: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI-DS128: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI-DS128: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-DS128-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-DS128-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-DS128-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_local_v8s8_align8
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_local_v8s8_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v8s8_align8
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; GFX9-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; GFX9-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-UNALIGNED-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX10-LABEL: name: test_load_local_v8s8_align8
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; GFX10: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX10: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX10: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; GFX10: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; GFX10: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; GFX10: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; GFX10: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; GFX10: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX10-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX10-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX10-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v8s8_align8
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-UNALIGNED-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 3)
     %2:_(<2 x s32>) = G_BITCAST %1
@@ -7850,652 +7850,652 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v16s8_align16
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C6]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C5]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C5]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C5]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C5]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C5]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C5]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C6]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C5]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C7]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C5]]
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C5]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C5]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C6]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C5]]
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C7]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C6]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C5]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C5]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C5]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C5]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C5]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C5]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C6]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C5]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C7]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C5]]
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C5]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C5]]
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C6]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C5]]
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C7]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; CI-LABEL: name: test_load_local_v16s8_align16
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C6]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C5]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C5]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C5]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C5]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C5]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C5]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C6]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C5]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C7]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C5]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C5]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C5]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C6]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C5]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C7]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C6]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C5]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C5]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C5]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C5]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C5]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C5]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C6]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C5]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C7]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C5]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C5]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C5]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C6]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C5]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C7]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v16s8_align16
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI-DS128: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; CI-DS128: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; CI-DS128: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; CI-DS128: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
-    ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
-    ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
-    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
-    ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C6]]
-    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C6]]
-    ; CI-DS128: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C6]]
-    ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI-DS128: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI-DS128: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C6]]
-    ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
-    ; CI-DS128: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C6]]
-    ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
-    ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-DS128: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C6]]
-    ; CI-DS128: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C6]]
-    ; CI-DS128: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI-DS128: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI-DS128: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C6]]
-    ; CI-DS128: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
-    ; CI-DS128: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI-DS128: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C6]]
-    ; CI-DS128: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
-    ; CI-DS128: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-DS128-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+    ; CI-DS128-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+    ; CI-DS128-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+    ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-DS128-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+    ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
+    ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-DS128-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C6]]
+    ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-DS128-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C6]]
+    ; CI-DS128-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C6]]
+    ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-DS128-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C6]]
+    ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-DS128-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C6]]
+    ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
+    ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-DS128-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C6]]
+    ; CI-DS128-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C6]]
+    ; CI-DS128-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-DS128-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C6]]
+    ; CI-DS128-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; CI-DS128-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-DS128-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C6]]
+    ; CI-DS128-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
+    ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; VI-LABEL: name: test_load_local_v16s8_align16
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
-    ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C6]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C6]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C6]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C6]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C6]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C6]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C6]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C6]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C6]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C6]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C6]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C6]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C6]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C6]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C6]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C6]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C6]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C6]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_local_v16s8_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C6]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C6]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C6]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C6]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C6]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C6]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C6]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C6]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C6]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C6]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C6]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C6]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C6]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C6]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C6]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C6]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C6]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C6]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v16s8_align16
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; GFX9-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; GFX9-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
-    ; GFX9-UNALIGNED: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9-UNALIGNED: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9-UNALIGNED: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9-UNALIGNED: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
-    ; GFX9-UNALIGNED: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9-UNALIGNED: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9-UNALIGNED: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
-    ; GFX9-UNALIGNED: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-UNALIGNED-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-UNALIGNED-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-UNALIGNED-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-UNALIGNED-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-UNALIGNED-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-UNALIGNED-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-UNALIGNED-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]]
+    ; GFX9-UNALIGNED-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX10-LABEL: name: test_load_local_v16s8_align16
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX10: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX10: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX10: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX10: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
-    ; GFX10: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
-    ; GFX10: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
-    ; GFX10: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
-    ; GFX10: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX10: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
-    ; GFX10: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
-    ; GFX10: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C6]]
-    ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C6]]
-    ; GFX10: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C6]]
-    ; GFX10: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX10: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX10: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C6]]
-    ; GFX10: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
-    ; GFX10: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX10: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C6]]
-    ; GFX10: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
-    ; GFX10: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C6]]
-    ; GFX10: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C6]]
-    ; GFX10: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX10: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX10: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C6]]
-    ; GFX10: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
-    ; GFX10: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX10: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C6]]
-    ; GFX10: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
-    ; GFX10: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX10-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+    ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+    ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+    ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+    ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
+    ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C6]]
+    ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C6]]
+    ; GFX10-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C6]]
+    ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C6]]
+    ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX10-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C6]]
+    ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
+    ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX10-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C6]]
+    ; GFX10-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C6]]
+    ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX10-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C6]]
+    ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX10-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C6]]
+    ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
+    ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v16s8_align16
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
-    ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
-    ; GFX10-UNALIGNED: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
-    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
-    ; GFX10-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
-    ; GFX10-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C6]]
-    ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C6]]
-    ; GFX10-UNALIGNED: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C6]]
-    ; GFX10-UNALIGNED: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX10-UNALIGNED: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C6]]
-    ; GFX10-UNALIGNED: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
-    ; GFX10-UNALIGNED: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX10-UNALIGNED: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C6]]
-    ; GFX10-UNALIGNED: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
-    ; GFX10-UNALIGNED: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10-UNALIGNED: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C6]]
-    ; GFX10-UNALIGNED: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C6]]
-    ; GFX10-UNALIGNED: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX10-UNALIGNED: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C6]]
-    ; GFX10-UNALIGNED: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
-    ; GFX10-UNALIGNED: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX10-UNALIGNED: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C6]]
-    ; GFX10-UNALIGNED: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
-    ; GFX10-UNALIGNED: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+    ; GFX10-UNALIGNED-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+    ; GFX10-UNALIGNED-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C6]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-UNALIGNED-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C6]]
+    ; GFX10-UNALIGNED-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C6]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-UNALIGNED-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C6]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX10-UNALIGNED-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C6]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX10-UNALIGNED-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C6]]
+    ; GFX10-UNALIGNED-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C6]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX10-UNALIGNED-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C6]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX10-UNALIGNED-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C6]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 1, addrspace 3)
     %2:_(<4 x s32>) = G_BITCAST %1
@@ -8510,36 +8510,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v2s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
-    ; SI: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; CI-LABEL: name: test_load_local_v2s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
-    ; CI: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; CI-DS128-LABEL: name: test_load_local_v2s16_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
-    ; CI-DS128: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; VI-LABEL: name: test_load_local_v2s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
-    ; VI: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; GFX9-LABEL: name: test_load_local_v2s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s16_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; GFX10-LABEL: name: test_load_local_v2s16_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
-    ; GFX10: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s16_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 3)
     $vgpr0 = COPY %1
@@ -8553,88 +8553,88 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v2s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; CI-LABEL: name: test_load_local_v2s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; CI-DS128-LABEL: name: test_load_local_v2s16_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-DS128: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; VI-LABEL: name: test_load_local_v2s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; GFX9-LABEL: name: test_load_local_v2s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s16_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; GFX10-LABEL: name: test_load_local_v2s16_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX10: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s16_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 3)
     $vgpr0 = COPY %1
@@ -8648,229 +8648,229 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v2s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; CI-LABEL: name: test_load_local_v2s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; CI-DS128-LABEL: name: test_load_local_v2s16_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-DS128: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-DS128-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-DS128-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-DS128-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; VI-LABEL: name: test_load_local_v2s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; GFX9-LABEL: name: test_load_local_v2s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s16_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; GFX10-LABEL: name: test_load_local_v2s16_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX10: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX10: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX10: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX10: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX10-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s16_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX10-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX10-UNALIGNED: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 3)
     $vgpr0 = COPY %1
@@ -8884,200 +8884,200 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v3s16_align8
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-LABEL: name: test_load_local_v3s16_align8
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-DS128-LABEL: name: test_load_local_v3s16_align8
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI-DS128: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI-DS128: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CI-DS128: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-DS128: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-DS128: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
+    ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CI-DS128-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-DS128-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-DS128-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_local_v3s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_local_v3s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align8
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
-    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9-UNALIGNED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9-UNALIGNED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX10-LABEL: name: test_load_local_v3s16_align8
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
-    ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX10: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX10: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; GFX10: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
+    ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
+    ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align8
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX10-UNALIGNED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 3)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -9093,216 +9093,216 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v3s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-LABEL: name: test_load_local_v3s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-DS128-LABEL: name: test_load_local_v3s16_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-DS128: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-DS128: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_local_v3s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_local_v3s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX10-LABEL: name: test_load_local_v3s16_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 3)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -9318,423 +9318,423 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v3s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C7]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
-    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C7]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-LABEL: name: test_load_local_v3s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C7]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
-    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C7]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-DS128-LABEL: name: test_load_local_v3s16_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI-DS128: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
-    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; CI-DS128: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
-    ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; CI-DS128: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
-    ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C7]]
-    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
-    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
-    ; CI-DS128: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-DS128-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-DS128-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-DS128-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; CI-DS128-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-DS128-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-DS128-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-DS128-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI-DS128-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
+    ; CI-DS128-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C7]]
+    ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_local_v3s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C6]]
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C6]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C6]]
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C6]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_local_v3s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, align 1, addrspace 3)
-    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 1, addrspace 3)
-    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX10-LABEL: name: test_load_local_v3s16_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX10: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX10: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX10: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX10: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; GFX10: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX10: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
-    ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX10: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX10-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX10-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST]](s32)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX10-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; GFX10-UNALIGNED: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX10-UNALIGNED: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX10-UNALIGNED: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 3)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -9749,36 +9749,36 @@ body: |
     liveins: $vgpr0
     ; SI-LABEL: name: test_load_local_v4s16_align8
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; CI-LABEL: name: test_load_local_v4s16_align8
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; CI-DS128-LABEL: name: test_load_local_v4s16_align8
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; VI-LABEL: name: test_load_local_v4s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_local_v4s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align8
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX10-LABEL: name: test_load_local_v4s16_align8
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align8
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -9792,36 +9792,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v4s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; CI-LABEL: name: test_load_local_v4s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; CI-DS128-LABEL: name: test_load_local_v4s16_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; VI-LABEL: name: test_load_local_v4s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_local_v4s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX10-LABEL: name: test_load_local_v4s16_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -9834,160 +9834,160 @@ body: |
     liveins: $vgpr0
     ; SI-LABEL: name: test_load_local_v4s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; CI-LABEL: name: test_load_local_v4s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; CI-DS128-LABEL: name: test_load_local_v4s16_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; VI-LABEL: name: test_load_local_v4s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_local_v4s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 2, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 2, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX10-LABEL: name: test_load_local_v4s16_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -10001,418 +10001,418 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v4s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
-    ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
+    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; CI-LABEL: name: test_load_local_v4s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; CI-DS128-LABEL: name: test_load_local_v4s16_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI-DS128: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32)
-    ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI-DS128: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
-    ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; CI-DS128: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI-DS128: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
-    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-DS128-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-DS128-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-DS128-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-DS128-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; CI-DS128-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-DS128-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-DS128-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; CI-DS128-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-DS128-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
+    ; CI-DS128-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-DS128-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-DS128-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; VI-LABEL: name: test_load_local_v4s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C4]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C4]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_local_v4s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX10-LABEL: name: test_load_local_v4s16_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX10: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX10: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX10: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX10: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; GFX10: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX10: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX10: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; GFX10: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX10: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
-    ; GFX10: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; GFX10: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX10: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX10: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
-    ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX10-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
+    ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX10-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX10-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
+    ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX10-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX10-UNALIGNED: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; GFX10-UNALIGNED: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; GFX10-UNALIGNED: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX10-UNALIGNED: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX10-UNALIGNED: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -10426,36 +10426,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v2s32_align8
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-LABEL: name: test_load_local_v2s32_align8
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v2s32_align8
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_load_local_v2s32_align8
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_local_v2s32_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align8
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX10-LABEL: name: test_load_local_v2s32_align8
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align8
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -10469,36 +10469,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v2s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-LABEL: name: test_load_local_v2s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v2s32_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_load_local_v2s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_local_v2s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX10-LABEL: name: test_load_local_v2s32_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -10512,169 +10512,169 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v2s32_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-LABEL: name: test_load_local_v2s32_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v2s32_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_local_v2s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_local_v2s32_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 2, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 2, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX10-LABEL: name: test_load_local_v2s32_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -10688,337 +10688,337 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v2s32_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-LABEL: name: test_load_local_v2s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v2s32_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-DS128-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-DS128-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-DS128-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_local_v2s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_local_v2s32_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX10-LABEL: name: test_load_local_v2s32_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX10: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX10: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX10: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX10: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX10: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -11032,463 +11032,463 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v3s32_align16
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; CI-LABEL: name: test_load_local_v3s32_align16
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v3s32_align16
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI-DS128: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI-DS128: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI-DS128: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI-DS128: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-DS128-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-DS128-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-DS128-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; CI-DS128-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-DS128-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-DS128-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-DS128-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; VI-LABEL: name: test_load_local_v3s32_align16
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-LABEL: name: test_load_local_v3s32_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s32_align16
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; GFX10-LABEL: name: test_load_local_v3s32_align16
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX10: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX10: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX10: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX10: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX10: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX10: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX10: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX10: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX10: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX10: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX10: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX10: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX10: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX10: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX10-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX10-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX10-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s32_align16
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX10-UNALIGNED: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX10-UNALIGNED: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-UNALIGNED-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX10-UNALIGNED-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 1, addrspace 3)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -11502,71 +11502,71 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v3s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; CI-LABEL: name: test_load_local_v3s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v3s32_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; VI-LABEL: name: test_load_local_v3s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-LABEL: name: test_load_local_v3s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s32_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; GFX10-LABEL: name: test_load_local_v3s32_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s32_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -11580,44 +11580,44 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v4s32_align16
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 16, addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 16, addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     ; CI-LABEL: name: test_load_local_v4s32_align16
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 16, addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 16, addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v4s32_align16
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; VI-LABEL: name: test_load_local_v4s32_align16
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_local_v4s32_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align16
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX10-LABEL: name: test_load_local_v4s32_align16
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align16
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -11631,44 +11631,44 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v4s32_align8
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     ; CI-LABEL: name: test_load_local_v4s32_align8
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v4s32_align8
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; VI-LABEL: name: test_load_local_v4s32_align8
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_local_v4s32_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align8
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX10-LABEL: name: test_load_local_v4s32_align8
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align8
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -11682,94 +11682,94 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v4s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     ; CI-LABEL: name: test_load_local_v4s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v4s32_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; VI-LABEL: name: test_load_local_v4s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_local_v4s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX10-LABEL: name: test_load_local_v4s32_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -11783,302 +11783,302 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v4s32_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     ; CI-LABEL: name: test_load_local_v4s32_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v4s32_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
-    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; CI-DS128-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; CI-DS128-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; CI-DS128-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
+    ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     ; VI-LABEL: name: test_load_local_v4s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_local_v4s32_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 2, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 2, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX10-LABEL: name: test_load_local_v4s32_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX10: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; GFX10: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX10: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
+    ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 2, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -12092,598 +12092,598 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v4s32_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     ; CI-LABEL: name: test_load_local_v4s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v4s32_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI-DS128: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI-DS128: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI-DS128: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI-DS128: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI-DS128: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; CI-DS128: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; CI-DS128: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; CI-DS128: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; CI-DS128: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI-DS128: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI-DS128: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI-DS128: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI-DS128: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI-DS128: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI-DS128: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI-DS128: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI-DS128: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI-DS128: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-DS128-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-DS128-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-DS128-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; CI-DS128-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-DS128-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-DS128-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-DS128-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-DS128-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-DS128-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; CI-DS128-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; CI-DS128-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-DS128-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-DS128-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-DS128-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; VI-LABEL: name: test_load_local_v4s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_local_v4s32_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX10-LABEL: name: test_load_local_v4s32_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX10: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX10: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX10: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX10: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX10: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX10: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX10: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX10: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX10: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX10: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX10: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX10: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX10: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX10: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX10: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX10: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX10: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX10: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX10: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX10: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX10: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX10: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX10: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX10: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX10: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX10: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX10: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX10: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX10-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX10-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX10-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; GFX10-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX10-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX10-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX10-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX10-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX10-UNALIGNED: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX10-UNALIGNED: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10-UNALIGNED: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX10-UNALIGNED: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX10-UNALIGNED: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-UNALIGNED-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX10-UNALIGNED-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX10-UNALIGNED-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX10-UNALIGNED-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX10-UNALIGNED-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 1, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -12697,80 +12697,80 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v8s32_align32
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 32, addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 16, align 16, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x s32>) from unknown-address + 24, addrspace 3)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 32, addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 16, align 16, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x s32>) from unknown-address + 24, addrspace 3)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
     ; CI-LABEL: name: test_load_local_v8s32_align32
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 32, addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 16, align 16, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x s32>) from unknown-address + 24, addrspace 3)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 32, addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 16, align 16, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x s32>) from unknown-address + 24, addrspace 3)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v8s32_align32
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
-    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
+    ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
     ; VI-LABEL: name: test_load_local_v8s32_align32
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
     ; GFX9-LABEL: name: test_load_local_v8s32_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v8s32_align32
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
     ; GFX10-LABEL: name: test_load_local_v8s32_align32
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
+    ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v8s32_align32
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 32, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -12784,140 +12784,140 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v16s32_align32
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 32, addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 16, align 16, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x s32>) from unknown-address + 24, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD3]](p3) :: (load (<2 x s32>) from unknown-address + 32, align 32, addrspace 3)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD4]](p3) :: (load (<2 x s32>) from unknown-address + 40, addrspace 3)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD5]](p3) :: (load (<2 x s32>) from unknown-address + 48, align 16, addrspace 3)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 56
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD6]](p3) :: (load (<2 x s32>) from unknown-address + 56, addrspace 3)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>), [[LOAD4]](<2 x s32>), [[LOAD5]](<2 x s32>), [[LOAD6]](<2 x s32>), [[LOAD7]](<2 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 32, addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 16, align 16, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x s32>) from unknown-address + 24, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD3]](p3) :: (load (<2 x s32>) from unknown-address + 32, align 32, addrspace 3)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD4]](p3) :: (load (<2 x s32>) from unknown-address + 40, addrspace 3)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD5]](p3) :: (load (<2 x s32>) from unknown-address + 48, align 16, addrspace 3)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 56
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD6]](p3) :: (load (<2 x s32>) from unknown-address + 56, addrspace 3)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>), [[LOAD4]](<2 x s32>), [[LOAD5]](<2 x s32>), [[LOAD6]](<2 x s32>), [[LOAD7]](<2 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
     ; CI-LABEL: name: test_load_local_v16s32_align32
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 32, addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 16, align 16, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x s32>) from unknown-address + 24, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD3]](p3) :: (load (<2 x s32>) from unknown-address + 32, align 32, addrspace 3)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD4]](p3) :: (load (<2 x s32>) from unknown-address + 40, addrspace 3)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD5]](p3) :: (load (<2 x s32>) from unknown-address + 48, align 16, addrspace 3)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 56
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD6]](p3) :: (load (<2 x s32>) from unknown-address + 56, addrspace 3)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>), [[LOAD4]](<2 x s32>), [[LOAD5]](<2 x s32>), [[LOAD6]](<2 x s32>), [[LOAD7]](<2 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 32, addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 16, align 16, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x s32>) from unknown-address + 24, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD3]](p3) :: (load (<2 x s32>) from unknown-address + 32, align 32, addrspace 3)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD4]](p3) :: (load (<2 x s32>) from unknown-address + 40, addrspace 3)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD5]](p3) :: (load (<2 x s32>) from unknown-address + 48, align 16, addrspace 3)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 56
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD6]](p3) :: (load (<2 x s32>) from unknown-address + 56, addrspace 3)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>), [[LOAD4]](<2 x s32>), [[LOAD5]](<2 x s32>), [[LOAD6]](<2 x s32>), [[LOAD7]](<2 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v16s32_align32
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3)
-    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3)
+    ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
     ; VI-LABEL: name: test_load_local_v16s32_align32
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
     ; GFX9-LABEL: name: test_load_local_v16s32_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v16s32_align32
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
-    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX9-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3)
-    ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
-    ; GFX9-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3)
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; GFX9-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
+    ; GFX9-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
     ; GFX10-LABEL: name: test_load_local_v16s32_align32
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3)
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3)
+    ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v16s32_align32
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 32, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
@@ -12931,64 +12931,64 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v2s64_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; CI-LABEL: name: test_load_local_v2s64_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; CI-DS128-LABEL: name: test_load_local_v2s64_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3)
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3)
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; VI-LABEL: name: test_load_local_v2s64_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-LABEL: name: test_load_local_v2s64_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s64_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; GFX10-LABEL: name: test_load_local_v2s64_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3)
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3)
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s64_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -13002,816 +13002,816 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v2s64_align16
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; SI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY4]](s32)
-    ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY5]](s32)
-    ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY6]](s32)
-    ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; SI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY7]](s32)
-    ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY8]](s32)
-    ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
-    ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY4]](s32)
+    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY5]](s32)
+    ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY6]](s32)
+    ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY7]](s32)
+    ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY8]](s32)
+    ; SI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
+    ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; CI-LABEL: name: test_load_local_v2s64_align16
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY4]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; CI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; CI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY5]](s32)
-    ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY6]](s32)
-    ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; CI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; CI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY7]](s32)
-    ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY8]](s32)
-    ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
-    ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY4]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; CI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; CI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY5]](s32)
+    ; CI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; CI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; CI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY6]](s32)
+    ; CI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; CI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; CI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY7]](s32)
+    ; CI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; CI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; CI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY8]](s32)
+    ; CI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
+    ; CI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; CI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; CI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; CI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; CI-DS128-LABEL: name: test_load_local_v2s64_align16
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI-DS128: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-DS128: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-DS128: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
-    ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI-DS128: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY4]](s32)
-    ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-DS128: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI-DS128: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI-DS128: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; CI-DS128: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
-    ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; CI-DS128: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
-    ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; CI-DS128: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
-    ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; CI-DS128: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
-    ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; CI-DS128: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; CI-DS128: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-DS128: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
-    ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY5]](s32)
-    ; CI-DS128: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; CI-DS128: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; CI-DS128: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; CI-DS128: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
-    ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY6]](s32)
-    ; CI-DS128: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; CI-DS128: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; CI-DS128: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; CI-DS128: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-DS128: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
-    ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY7]](s32)
-    ; CI-DS128: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
-    ; CI-DS128: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; CI-DS128: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; CI-DS128: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CI-DS128: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
-    ; CI-DS128: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY8]](s32)
-    ; CI-DS128: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; CI-DS128: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
-    ; CI-DS128: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; CI-DS128: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; CI-DS128: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
-    ; CI-DS128: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; CI-DS128: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; CI-DS128: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; CI-DS128: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
-    ; CI-DS128: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; CI-DS128: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-DS128-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; CI-DS128-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-DS128-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C9]]
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-DS128-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C9]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-DS128-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-DS128-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; CI-DS128-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C9]]
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY3]](s32)
+    ; CI-DS128-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-DS128-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-DS128-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; CI-DS128-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-DS128-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C9]]
+    ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY4]](s32)
+    ; CI-DS128-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-DS128-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-DS128-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CI-DS128-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-DS128-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-DS128-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; CI-DS128-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
+    ; CI-DS128-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; CI-DS128-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; CI-DS128-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; CI-DS128-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-DS128-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C9]]
+    ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY5]](s32)
+    ; CI-DS128-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; CI-DS128-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; CI-DS128-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; CI-DS128-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-DS128-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C9]]
+    ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY6]](s32)
+    ; CI-DS128-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
+    ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; CI-DS128-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; CI-DS128-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; CI-DS128-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-DS128-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C9]]
+    ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY7]](s32)
+    ; CI-DS128-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
+    ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; CI-DS128-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; CI-DS128-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; CI-DS128-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-DS128-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C9]]
+    ; CI-DS128-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY8]](s32)
+    ; CI-DS128-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
+    ; CI-DS128-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]]
+    ; CI-DS128-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; CI-DS128-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; CI-DS128-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32)
+    ; CI-DS128-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; CI-DS128-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; CI-DS128-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; CI-DS128-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
+    ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; CI-DS128-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; VI-LABEL: name: test_load_local_v2s64_align16
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; VI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; VI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; VI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; VI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-LABEL: name: test_load_local_v2s64_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; GFX9-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; GFX9-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; GFX9-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s64_align16
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     ; GFX10-LABEL: name: test_load_local_v2s64_align16
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX10: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX10: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX10: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX10: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX10: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX10: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX10: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX10: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX10: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX10: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX10: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX10: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX10: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX10: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX10: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX10: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX10: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX10: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32)
-    ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
-    ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX10: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
-    ; GFX10: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX10: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
-    ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX10: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
-    ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX10: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; GFX10: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; GFX10: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; GFX10: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; GFX10: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; GFX10: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; GFX10: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; GFX10: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; GFX10: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; GFX10: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; GFX10: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; GFX10: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; GFX10: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; GFX10: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; GFX10: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; GFX10: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; GFX10: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; GFX10: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; GFX10: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; GFX10: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; GFX10: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; GFX10: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; GFX10: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; GFX10: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; GFX10: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; GFX10: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; GFX10: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; GFX10: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; GFX10: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; GFX10: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; GFX10: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; GFX10: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; GFX10: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX10-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32)
+    ; GFX10-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
+    ; GFX10-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
+    ; GFX10-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
+    ; GFX10-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; GFX10-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; GFX10-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; GFX10-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; GFX10-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; GFX10-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; GFX10-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; GFX10-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; GFX10-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; GFX10-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; GFX10-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; GFX10-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; GFX10-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; GFX10-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; GFX10-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s64_align16
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; GFX10-UNALIGNED: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; GFX10-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; GFX10-UNALIGNED: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; GFX10-UNALIGNED: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
-    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX10-UNALIGNED: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX10-UNALIGNED: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX10-UNALIGNED: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
-    ; GFX10-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX10-UNALIGNED: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX10-UNALIGNED: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
-    ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; GFX10-UNALIGNED: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32)
-    ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
-    ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX10-UNALIGNED: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
-    ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
-    ; GFX10-UNALIGNED: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
-    ; GFX10-UNALIGNED: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
-    ; GFX10-UNALIGNED: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
-    ; GFX10-UNALIGNED: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; GFX10-UNALIGNED: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
-    ; GFX10-UNALIGNED: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
-    ; GFX10-UNALIGNED: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
-    ; GFX10-UNALIGNED: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
-    ; GFX10-UNALIGNED: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
-    ; GFX10-UNALIGNED: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; GFX10-UNALIGNED: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
-    ; GFX10-UNALIGNED: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
-    ; GFX10-UNALIGNED: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
-    ; GFX10-UNALIGNED: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
-    ; GFX10-UNALIGNED: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
-    ; GFX10-UNALIGNED: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
-    ; GFX10-UNALIGNED: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
-    ; GFX10-UNALIGNED: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
-    ; GFX10-UNALIGNED: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
-    ; GFX10-UNALIGNED: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
-    ; GFX10-UNALIGNED: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
-    ; GFX10-UNALIGNED: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
-    ; GFX10-UNALIGNED: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; GFX10-UNALIGNED: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; GFX10-UNALIGNED: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
-    ; GFX10-UNALIGNED: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
-    ; GFX10-UNALIGNED: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; GFX10-UNALIGNED: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; GFX10-UNALIGNED: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
-    ; GFX10-UNALIGNED: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
-    ; GFX10-UNALIGNED: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; GFX10-UNALIGNED-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
+    ; GFX10-UNALIGNED-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -13825,96 +13825,96 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v3s64_align32
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; CI-LABEL: name: test_load_local_v3s64_align32
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; CI-DS128-LABEL: name: test_load_local_v3s64_align32
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
-    ; CI-DS128: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
+    ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-DS128-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_local_v3s64_align32
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
-    ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_local_v3s64_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
-    ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s64_align32
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
-    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX10-LABEL: name: test_load_local_v3s64_align32
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
-    ; GFX10: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX10-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s64_align32
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
-    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 3)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -13930,80 +13930,80 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v4s64_align32
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p3) :: (load (s64) from unknown-address + 24, addrspace 3)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[LOAD3]](s64)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p3) :: (load (s64) from unknown-address + 24, addrspace 3)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[LOAD3]](s64)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
     ; CI-LABEL: name: test_load_local_v4s64_align32
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p3) :: (load (s64) from unknown-address + 24, addrspace 3)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[LOAD3]](s64)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p3) :: (load (s64) from unknown-address + 24, addrspace 3)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[LOAD3]](s64)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
     ; CI-DS128-LABEL: name: test_load_local_v4s64_align32
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3)
-    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3)
+    ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
     ; VI-LABEL: name: test_load_local_v4s64_align32
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_local_v4s64_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s64_align32
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3)
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
     ; GFX10-LABEL: name: test_load_local_v4s64_align32
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3)
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3)
+    ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s64_align32
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 32, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -14017,102 +14017,102 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v2p1_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; CI-LABEL: name: test_load_local_v2p1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; CI-DS128-LABEL: name: test_load_local_v2p1_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; VI-LABEL: name: test_load_local_v2p1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-LABEL: name: test_load_local_v2p1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2p1_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX10-LABEL: name: test_load_local_v2p1_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2p1_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -14126,36 +14126,36 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v2p3_align8
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; CI-LABEL: name: test_load_local_v2p3_align8
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; CI-DS128-LABEL: name: test_load_local_v2p3_align8
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; VI-LABEL: name: test_load_local_v2p3_align8
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; GFX9-LABEL: name: test_load_local_v2p3_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2p3_align8
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; GFX10-LABEL: name: test_load_local_v2p3_align8
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2p3_align8
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -14169,36 +14169,36 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_s32_from_1_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-LABEL: name: test_extload_local_s32_from_1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-DS128-LABEL: name: test_extload_local_s32_from_1_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; CI-DS128: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_extload_local_s32_from_1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_extload_local_s32_from_1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s32_from_1_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-LABEL: name: test_extload_local_s32_from_1_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; GFX10: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s32_from_1_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 3)
     $vgpr0 = COPY %1
@@ -14212,36 +14212,36 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_s32_from_2_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-LABEL: name: test_extload_local_s32_from_2_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-DS128-LABEL: name: test_extload_local_s32_from_2_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; CI-DS128: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_extload_local_s32_from_2_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_extload_local_s32_from_2_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s32_from_2_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-LABEL: name: test_extload_local_s32_from_2_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; GFX10: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s32_from_2_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load (s16), align 4, addrspace 3)
     $vgpr0 = COPY %1
@@ -14256,44 +14256,44 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_s64_from_1_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_extload_local_s64_from_1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-DS128-LABEL: name: test_extload_local_s64_from_1_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_extload_local_s64_from_1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_extload_local_s64_from_1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX10-LABEL: name: test_extload_local_s64_from_1_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; GFX10: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -14307,44 +14307,44 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_s64_from_2_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_extload_local_s64_from_2_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-DS128-LABEL: name: test_extload_local_s64_from_2_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_extload_local_s64_from_2_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_extload_local_s64_from_2_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX10-LABEL: name: test_extload_local_s64_from_2_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; GFX10: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -14358,44 +14358,44 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_s64_from_4_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_extload_local_s64_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-DS128-LABEL: name: test_extload_local_s64_from_4_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_extload_local_s64_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_extload_local_s64_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_4_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX9-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX10-LABEL: name: test_extload_local_s64_from_4_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX10: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_4_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s32), align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -14409,68 +14409,68 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_s128_from_4_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; SI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; SI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; SI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; CI-LABEL: name: test_extload_local_s128_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; CI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; CI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; CI-DS128-LABEL: name: test_extload_local_s128_from_4_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI-DS128: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; CI-DS128: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CI-DS128: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-DS128-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; CI-DS128-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CI-DS128-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; VI-LABEL: name: test_extload_local_s128_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; GFX9-LABEL: name: test_extload_local_s128_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s128_from_4_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; GFX10-LABEL: name: test_extload_local_s128_from_4_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; GFX10: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; GFX10: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s128_from_4_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; GFX10-UNALIGNED: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     %0:_(p3) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load (s32), align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -14484,44 +14484,44 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_s64_from_2_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_extload_local_s64_from_2_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-DS128-LABEL: name: test_extload_local_s64_from_2_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_extload_local_s64_from_2_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_extload_local_s64_from_2_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX10-LABEL: name: test_extload_local_s64_from_2_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; GFX10: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -14535,44 +14535,44 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_s64_from_1_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_extload_local_s64_from_1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-DS128-LABEL: name: test_extload_local_s64_from_1_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_extload_local_s64_from_1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_extload_local_s64_from_1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX10-LABEL: name: test_extload_local_s64_from_1_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; GFX10: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -14586,36 +14586,36 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_v2s32_from_4_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-LABEL: name: test_extload_local_v2s32_from_4_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-DS128-LABEL: name: test_extload_local_v2s32_from_4_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_extload_local_v2s32_from_4_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-LABEL: name: test_extload_local_v2s32_from_4_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX10-LABEL: name: test_extload_local_v2s32_from_4_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -14629,36 +14629,36 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_v2s32_from_4_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-LABEL: name: test_extload_local_v2s32_from_4_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-DS128-LABEL: name: test_extload_local_v2s32_from_4_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_extload_local_v2s32_from_4_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-LABEL: name: test_extload_local_v2s32_from_4_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX10-LABEL: name: test_extload_local_v2s32_from_4_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -14672,36 +14672,36 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_v2s32_from_4_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-LABEL: name: test_extload_local_v2s32_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-DS128-LABEL: name: test_extload_local_v2s32_from_4_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_extload_local_v2s32_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-LABEL: name: test_extload_local_v2s32_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX10-LABEL: name: test_extload_local_v2s32_from_4_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -14715,36 +14715,36 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_v3s32_from_6_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; CI-LABEL: name: test_extload_local_v3s32_from_6_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; CI-DS128-LABEL: name: test_extload_local_v3s32_from_6_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; VI-LABEL: name: test_extload_local_v3s32_from_6_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; GFX9-LABEL: name: test_extload_local_v3s32_from_6_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v3s32_from_6_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; GFX10-LABEL: name: test_extload_local_v3s32_from_6_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v3s32_from_6_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -14758,36 +14758,36 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_v4s32_from_8_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; CI-LABEL: name: test_extload_local_v4s32_from_8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; CI-DS128-LABEL: name: test_extload_local_v4s32_from_8_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; VI-LABEL: name: test_extload_local_v4s32_from_8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-LABEL: name: test_extload_local_v4s32_from_8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v4s32_from_8_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX10-LABEL: name: test_extload_local_v4s32_from_8_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v4s32_from_8_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -14801,898 +14801,898 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v2s96_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; SI: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
-    ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3)
-    ; SI: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
-    ; SI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3)
-    ; SI: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
-    ; SI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3)
-    ; SI: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
-    ; SI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3)
-    ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
-    ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
-    ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; SI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
-    ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
-    ; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
-    ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
-    ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; SI: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
-    ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3)
-    ; SI: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
-    ; SI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3)
-    ; SI: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
-    ; SI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3)
-    ; SI: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
-    ; SI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3)
-    ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
-    ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
-    ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; SI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
-    ; SI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
-    ; SI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
-    ; SI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
-    ; SI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
+    ; SI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3)
+    ; SI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
+    ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
+    ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
+    ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
+    ; SI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; SI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
+    ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
+    ; SI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
+    ; SI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3)
+    ; SI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
+    ; SI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
+    ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; SI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; SI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
+    ; SI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
+    ; SI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; SI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
+    ; SI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
+    ; SI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-LABEL: name: test_load_local_v2s96_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
-    ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3)
-    ; CI: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
-    ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3)
-    ; CI: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
-    ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3)
-    ; CI: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
-    ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3)
-    ; CI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
-    ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
-    ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; CI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; CI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
-    ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
-    ; CI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
-    ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
-    ; CI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; CI: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
-    ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3)
-    ; CI: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
-    ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3)
-    ; CI: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
-    ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3)
-    ; CI: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
-    ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3)
-    ; CI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
-    ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
-    ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; CI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; CI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
-    ; CI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
-    ; CI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
-    ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
-    ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
+    ; CI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3)
+    ; CI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
+    ; CI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
+    ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; CI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
+    ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
+    ; CI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; CI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
+    ; CI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
+    ; CI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
+    ; CI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3)
+    ; CI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
+    ; CI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
+    ; CI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; CI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; CI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
+    ; CI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
+    ; CI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; CI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
+    ; CI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
+    ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-DS128-LABEL: name: test_load_local_v2s96_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI-DS128: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI-DS128: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI-DS128: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI-DS128: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI-DS128: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; CI-DS128: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; CI-DS128: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; CI-DS128: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; CI-DS128: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI-DS128: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI-DS128: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI-DS128: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI-DS128: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI-DS128: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI-DS128: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI-DS128: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI-DS128: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI-DS128: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI-DS128: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
-    ; CI-DS128: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3)
-    ; CI-DS128: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
-    ; CI-DS128: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3)
-    ; CI-DS128: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
-    ; CI-DS128: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3)
-    ; CI-DS128: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
-    ; CI-DS128: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3)
-    ; CI-DS128: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
-    ; CI-DS128: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
-    ; CI-DS128: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; CI-DS128: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; CI-DS128: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
-    ; CI-DS128: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
-    ; CI-DS128: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; CI-DS128: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
-    ; CI-DS128: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
-    ; CI-DS128: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; CI-DS128: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
-    ; CI-DS128: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3)
-    ; CI-DS128: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
-    ; CI-DS128: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3)
-    ; CI-DS128: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
-    ; CI-DS128: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3)
-    ; CI-DS128: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
-    ; CI-DS128: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3)
-    ; CI-DS128: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
-    ; CI-DS128: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
-    ; CI-DS128: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; CI-DS128: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; CI-DS128: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
-    ; CI-DS128: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
-    ; CI-DS128: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; CI-DS128: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
-    ; CI-DS128: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
-    ; CI-DS128: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
-    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI-DS128: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-DS128-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-DS128-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-DS128-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; CI-DS128-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-DS128-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-DS128-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-DS128-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-DS128-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-DS128-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; CI-DS128-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; CI-DS128-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-DS128-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-DS128-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-DS128-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-DS128-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
+    ; CI-DS128-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3)
+    ; CI-DS128-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
+    ; CI-DS128-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; CI-DS128-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; CI-DS128-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; CI-DS128-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3)
+    ; CI-DS128-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
+    ; CI-DS128-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; CI-DS128-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; CI-DS128-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
+    ; CI-DS128-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
+    ; CI-DS128-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; VI-LABEL: name: test_load_local_v2s96_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
-    ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3)
-    ; VI: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
-    ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3)
-    ; VI: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
-    ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3)
-    ; VI: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
-    ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3)
-    ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
-    ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
-    ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; VI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
-    ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
-    ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
-    ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
-    ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; VI: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
-    ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3)
-    ; VI: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
-    ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3)
-    ; VI: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
-    ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3)
-    ; VI: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
-    ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3)
-    ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
-    ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
-    ; VI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
-    ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
-    ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
-    ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
-    ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
+    ; VI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3)
+    ; VI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
+    ; VI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
+    ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; VI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
+    ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
+    ; VI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; VI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
+    ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
+    ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
+    ; VI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3)
+    ; VI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
+    ; VI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
+    ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; VI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
+    ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
+    ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; VI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
+    ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
+    ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-LABEL: name: test_load_local_v2s96_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
-    ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3)
-    ; GFX9: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
-    ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3)
-    ; GFX9: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
-    ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3)
-    ; GFX9: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
-    ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3)
-    ; GFX9: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
-    ; GFX9: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
-    ; GFX9: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; GFX9: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; GFX9: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
-    ; GFX9: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
-    ; GFX9: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; GFX9: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
-    ; GFX9: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
-    ; GFX9: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; GFX9: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
-    ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3)
-    ; GFX9: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
-    ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3)
-    ; GFX9: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
-    ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3)
-    ; GFX9: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
-    ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3)
-    ; GFX9: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
-    ; GFX9: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
-    ; GFX9: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; GFX9: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; GFX9: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
-    ; GFX9: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
-    ; GFX9: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; GFX9: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
-    ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
-    ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3)
+    ; GFX9-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
+    ; GFX9-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
+    ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; GFX9-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
+    ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; GFX9-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
+    ; GFX9-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; GFX9-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3)
+    ; GFX9-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
+    ; GFX9-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
+    ; GFX9-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; GFX9-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
+    ; GFX9-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; GFX9-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
+    ; GFX9-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX10-LABEL: name: test_load_local_v2s96_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX10: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX10: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX10: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX10: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX10: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX10: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX10: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX10: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX10: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX10: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX10: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX10: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX10: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX10: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX10: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX10: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX10: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX10: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX10: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX10: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX10: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX10: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX10: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX10: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX10: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX10: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX10: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX10: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX10: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
-    ; GFX10: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3)
-    ; GFX10: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
-    ; GFX10: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3)
-    ; GFX10: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
-    ; GFX10: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3)
-    ; GFX10: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
-    ; GFX10: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3)
-    ; GFX10: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
-    ; GFX10: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
-    ; GFX10: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; GFX10: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; GFX10: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
-    ; GFX10: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
-    ; GFX10: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; GFX10: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
-    ; GFX10: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
-    ; GFX10: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; GFX10: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
-    ; GFX10: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3)
-    ; GFX10: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
-    ; GFX10: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3)
-    ; GFX10: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
-    ; GFX10: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3)
-    ; GFX10: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
-    ; GFX10: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3)
-    ; GFX10: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
-    ; GFX10: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
-    ; GFX10: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; GFX10: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; GFX10: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
-    ; GFX10: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
-    ; GFX10: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; GFX10: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
-    ; GFX10: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
-    ; GFX10: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
-    ; GFX10: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; GFX10: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX10: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX10: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX10-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX10-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX10-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; GFX10-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX10-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX10-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX10-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX10-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX10-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
+    ; GFX10-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3)
+    ; GFX10-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
+    ; GFX10-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
+    ; GFX10-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; GFX10-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
+    ; GFX10-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; GFX10-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
+    ; GFX10-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; GFX10-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3)
+    ; GFX10-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
+    ; GFX10-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
+    ; GFX10-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; GFX10-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; GFX10-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
+    ; GFX10-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
+    ; GFX10-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; GFX10-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
+    ; GFX10-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
+    ; GFX10-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX10-UNALIGNED: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX10-UNALIGNED: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10-UNALIGNED: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX10-UNALIGNED: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX10-UNALIGNED: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX10-UNALIGNED: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
-    ; GFX10-UNALIGNED: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; GFX10-UNALIGNED: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; GFX10-UNALIGNED: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; GFX10-UNALIGNED: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
-    ; GFX10-UNALIGNED: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; GFX10-UNALIGNED: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; GFX10-UNALIGNED: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
-    ; GFX10-UNALIGNED: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
-    ; GFX10-UNALIGNED: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX10-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-UNALIGNED-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX10-UNALIGNED-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX10-UNALIGNED-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX10-UNALIGNED-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; GFX10-UNALIGNED-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; GFX10-UNALIGNED-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; GFX10-UNALIGNED-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; GFX10-UNALIGNED-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 3)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -15709,457 +15709,457 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v2s96_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3)
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3)
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-LABEL: name: test_load_local_v2s96_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3)
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3)
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-DS128-LABEL: name: test_load_local_v2s96_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
-    ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3)
-    ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3)
-    ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
-    ; CI-DS128: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
-    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
-    ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
-    ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3)
-    ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3)
-    ; CI-DS128: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
-    ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
-    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
-    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI-DS128: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-DS128-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-DS128-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; CI-DS128-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; CI-DS128-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-DS128-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
+    ; CI-DS128-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3)
+    ; CI-DS128-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
+    ; CI-DS128-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
+    ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; CI-DS128-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
+    ; CI-DS128-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3)
+    ; CI-DS128-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
+    ; CI-DS128-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
+    ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; CI-DS128-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; VI-LABEL: name: test_load_local_v2s96_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3)
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3)
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-LABEL: name: test_load_local_v2s96_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3)
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3)
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 2, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 2, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX10-LABEL: name: test_load_local_v2s96_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX10: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; GFX10: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX10: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
-    ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3)
-    ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3)
-    ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
-    ; GFX10: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
-    ; GFX10: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
-    ; GFX10: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
-    ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3)
-    ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3)
-    ; GFX10: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
-    ; GFX10: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
-    ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
-    ; GFX10: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; GFX10: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX10: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX10: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
+    ; GFX10-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3)
+    ; GFX10-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
+    ; GFX10-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
+    ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
+    ; GFX10-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3)
+    ; GFX10-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
+    ; GFX10-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
+    ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
-    ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
-    ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3)
-    ; GFX10-UNALIGNED: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
-    ; GFX10-UNALIGNED: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
-    ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX10-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 3)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -16176,163 +16176,163 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v2s96_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
-    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-LABEL: name: test_load_local_v2s96_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
-    ; CI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
+    ; CI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-DS128-LABEL: name: test_load_local_v2s96_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
-    ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
-    ; CI-DS128: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
-    ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
-    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI-DS128: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
+    ; CI-DS128-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
+    ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; CI-DS128-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; VI-LABEL: name: test_load_local_v2s96_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
-    ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-LABEL: name: test_load_local_v2s96_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
-    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
+    ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX10-LABEL: name: test_load_local_v2s96_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
-    ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
-    ; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
-    ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
-    ; GFX10: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; GFX10: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX10: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX10: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
+    ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
-    ; GFX10-UNALIGNED: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX10-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 3)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -16349,143 +16349,143 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v2s96_align16
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 16, addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
-    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 16, addrspace 3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-LABEL: name: test_load_local_v2s96_align16
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 16, addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
-    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
-    ; CI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 16, addrspace 3)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
+    ; CI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-DS128-LABEL: name: test_load_local_v2s96_align16
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3)
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
-    ; CI-DS128: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
-    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI-DS128: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3)
+    ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
+    ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32)
+    ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
+    ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
+    ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
+    ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; VI-LABEL: name: test_load_local_v2s96_align16
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-LABEL: name: test_load_local_v2s96_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
-    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align16
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 3)
-    ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 3)
+    ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX10-LABEL: name: test_load_local_v2s96_align16
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3)
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
-    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32)
-    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
-    ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
-    ; GFX10: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX10: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX10: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
+    ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
+    ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align16
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3)
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
-    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
-    ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX10-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 20, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 3)
     %2:_(s96) = G_EXTRACT %1, 0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir
index 05cf3e08c56b3..96c7a2fa38613 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir
@@ -41,10 +41,10 @@ body: |
     liveins: $vgpr0_vgpr1
     ; SI-LABEL: name: widen_load_range0_tbaa
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), !tbaa !1, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; SI: $vgpr0 = COPY [[AND]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), !tbaa !1, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; SI-NEXT: $vgpr0 = COPY [[AND]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 1, !range !0, !tbaa !1)
     %2:_(s32) = G_ZEXT %1
@@ -60,8 +60,8 @@ body: |
     liveins: $vgpr0_vgpr1
     ; SI-LABEL: name: widen_load_range1_tbaa
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), !tbaa !1, addrspace 1)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), !tbaa !1, addrspace 1)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s24), align 4, addrspace 1, !range !0, !tbaa !1)
     $vgpr0 = COPY %1
@@ -74,10 +74,10 @@ body: |
     liveins: $vgpr0_vgpr1
     ; SI-LABEL: name: widen_load_tbaa0
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), !tbaa !1, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; SI: $vgpr0 = COPY [[AND]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), !tbaa !1, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; SI-NEXT: $vgpr0 = COPY [[AND]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 1, !tbaa !1)
     %2:_(s32) = G_ZEXT %1
@@ -93,8 +93,8 @@ body: |
     liveins: $vgpr0_vgpr1
     ; SI-LABEL: name: widen_load_tbaa1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), !tbaa !1, addrspace 1)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), !tbaa !1, addrspace 1)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load (s24), align 4, addrspace 1, !tbaa !1)
     $vgpr0 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
index 8139c1e10ca60..f0bacb3ba7113 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
@@ -13,28 +13,28 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s1_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; SI: $vgpr0 = COPY [[AND]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; SI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; CI-LABEL: name: test_load_private_s1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; CI: $vgpr0 = COPY [[AND]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; CI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; VI-LABEL: name: test_load_private_s1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; VI: $vgpr0 = COPY [[AND]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; VI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; GFX9-LABEL: name: test_load_private_s1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; GFX9: $vgpr0 = COPY [[AND]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s1) = G_LOAD %0 :: (load (s1), align 1, addrspace 5)
     %2:_(s32) = G_ZEXT %1
@@ -49,28 +49,28 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s2_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; SI: $vgpr0 = COPY [[AND]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; SI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; CI-LABEL: name: test_load_private_s2_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; CI: $vgpr0 = COPY [[AND]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; CI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; VI-LABEL: name: test_load_private_s2_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; VI: $vgpr0 = COPY [[AND]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; VI-NEXT: $vgpr0 = COPY [[AND]](s32)
     ; GFX9-LABEL: name: test_load_private_s2_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
-    ; GFX9: $vgpr0 = COPY [[AND]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s2) = G_LOAD %0 :: (load (s2), align 1, addrspace 5)
     %2:_(s32) = G_ZEXT %1
@@ -85,20 +85,20 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s8_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-LABEL: name: test_load_private_s8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_private_s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_private_s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s8) = G_LOAD %0 :: (load (s8), align 4, addrspace 5)
     %2:_(s32) = G_ANYEXT %1
@@ -113,20 +113,20 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s8_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-LABEL: name: test_load_private_s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_private_s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_private_s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s8) = G_LOAD %0 :: (load (s8), align 1, addrspace 5)
     %2:_(s32) = G_ANYEXT %1
@@ -141,20 +141,20 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-LABEL: name: test_load_private_s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_private_s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_private_s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s16) = G_LOAD %0 :: (load (s16), align 4, addrspace 5)
     %2:_(s32) = G_ANYEXT %1
@@ -169,20 +169,20 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-LABEL: name: test_load_private_s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_private_s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_private_s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s16) = G_LOAD %0 :: (load (s16), align 2, addrspace 5)
     %2:_(s32) = G_ANYEXT %1
@@ -197,70 +197,70 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-LABEL: name: test_load_private_s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_private_s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_private_s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 5)
     %2:_(s32) = G_ANYEXT %1
@@ -275,20 +275,20 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-LABEL: name: test_load_private_s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_private_s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_private_s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5)
     $vgpr0 = COPY %1
@@ -302,56 +302,56 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s32_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: $vgpr0 = COPY [[OR]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; CI-LABEL: name: test_load_private_s32_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: $vgpr0 = COPY [[OR]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; VI-LABEL: name: test_load_private_s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: $vgpr0 = COPY [[OR]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; GFX9-LABEL: name: test_load_private_s32_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: $vgpr0 = COPY [[OR]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 5)
     $vgpr0 = COPY %1
@@ -365,112 +365,112 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s32_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-LABEL: name: test_load_private_s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_private_s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_private_s32_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 5)
     $vgpr0 = COPY %1
@@ -484,20 +484,20 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s24_align8
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-LABEL: name: test_load_private_s24_align8
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_private_s24_align8
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_private_s24_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 8, addrspace 5)
     %2:_(s32) = G_ANYEXT %1
@@ -512,20 +512,20 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s24_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-LABEL: name: test_load_private_s24_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_load_private_s24_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_load_private_s24_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 5)
     %2:_(s32) = G_ANYEXT %1
@@ -540,124 +540,124 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s24_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-LABEL: name: test_load_private_s24_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_private_s24_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_private_s24_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 2, addrspace 5)
     %2:_(s32) = G_ANYEXT %1
@@ -672,128 +672,128 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s24_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-LABEL: name: test_load_private_s24_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_private_s24_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_private_s24_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 1, addrspace 5)
     %2:_(s32) = G_ANYEXT %1
@@ -808,80 +808,80 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s48_align8
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; CI-LABEL: name: test_load_private_s48_align8
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; VI-LABEL: name: test_load_private_s48_align8
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; GFX9-LABEL: name: test_load_private_s48_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s48) = G_LOAD %0 :: (load (s48), align 8, addrspace 5)
     %2:_(s64) = G_ANYEXT %1
@@ -896,80 +896,80 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s64_align8
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; CI-LABEL: name: test_load_private_s64_align8
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; VI-LABEL: name: test_load_private_s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; GFX9-LABEL: name: test_load_private_s64_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s48), align 8, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -983,36 +983,36 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s64_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; CI-LABEL: name: test_load_private_s64_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; VI-LABEL: name: test_load_private_s64_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; GFX9-LABEL: name: test_load_private_s64_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -1026,96 +1026,96 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s64_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; CI-LABEL: name: test_load_private_s64_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; VI-LABEL: name: test_load_private_s64_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; GFX9-LABEL: name: test_load_private_s64_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -1129,192 +1129,192 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s64_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; CI-LABEL: name: test_load_private_s64_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; VI-LABEL: name: test_load_private_s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; GFX9-LABEL: name: test_load_private_s64_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -1328,268 +1328,268 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s96_align16
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-LABEL: name: test_load_private_s96_align16
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_private_s96_align16
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_private_s96_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p5) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 5)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -1603,52 +1603,52 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s96_align8
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-LABEL: name: test_load_private_s96_align8
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_private_s96_align8
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_private_s96_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p5) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 8, addrspace 5)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -1662,52 +1662,52 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s96_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-LABEL: name: test_load_private_s96_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_private_s96_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_private_s96_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p5) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 5)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -1721,136 +1721,136 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s96_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-LABEL: name: test_load_private_s96_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_private_s96_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_private_s96_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p5) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 5)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -1864,268 +1864,268 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s96_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-LABEL: name: test_load_private_s96_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_private_s96_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_private_s96_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p5) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 5)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -2139,344 +2139,344 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s128_align16
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-LABEL: name: test_load_private_s128_align16
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_private_s128_align16
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-LABEL: name: test_load_private_s128_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p5) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -2490,64 +2490,64 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s128_align8
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-LABEL: name: test_load_private_s128_align8
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_private_s128_align8
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-LABEL: name: test_load_private_s128_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p5) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 8, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -2561,64 +2561,64 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s128_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-LABEL: name: test_load_private_s128_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_private_s128_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-LABEL: name: test_load_private_s128_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p5) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -2632,172 +2632,172 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s128_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-LABEL: name: test_load_private_s128_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_private_s128_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-LABEL: name: test_load_private_s128_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p5) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 2, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -2811,344 +2811,344 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s128_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; CI-LABEL: name: test_load_private_s128_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; VI-LABEL: name: test_load_private_s128_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-LABEL: name: test_load_private_s128_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p5) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -3162,36 +3162,36 @@ body: |
 
     ; SI-LABEL: name: test_load_private_p1_align8
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; CI-LABEL: name: test_load_private_p1_align8
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; VI-LABEL: name: test_load_private_p1_align8
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; GFX9-LABEL: name: test_load_private_p1_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     %0:_(p5) = COPY $vgpr0
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -3205,36 +3205,36 @@ body: |
 
     ; SI-LABEL: name: test_load_private_p1_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; CI-LABEL: name: test_load_private_p1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; VI-LABEL: name: test_load_private_p1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; GFX9-LABEL: name: test_load_private_p1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     %0:_(p5) = COPY $vgpr0
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -3248,96 +3248,96 @@ body: |
 
     ; SI-LABEL: name: test_load_private_p1_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; CI-LABEL: name: test_load_private_p1_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; VI-LABEL: name: test_load_private_p1_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; GFX9-LABEL: name: test_load_private_p1_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     %0:_(p5) = COPY $vgpr0
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 2, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -3351,192 +3351,192 @@ body: |
 
     ; SI-LABEL: name: test_load_private_p1_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; CI-LABEL: name: test_load_private_p1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; VI-LABEL: name: test_load_private_p1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     ; GFX9-LABEL: name: test_load_private_p1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     %0:_(p5) = COPY $vgpr0
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -3550,20 +3550,20 @@ body: |
 
     ; SI-LABEL: name: test_load_private_p3_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5)
-    ; SI: $vgpr0 = COPY [[LOAD]](p3)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; CI-LABEL: name: test_load_private_p3_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5)
-    ; CI: $vgpr0 = COPY [[LOAD]](p3)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; VI-LABEL: name: test_load_private_p3_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5)
-    ; VI: $vgpr0 = COPY [[LOAD]](p3)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     ; GFX9-LABEL: name: test_load_private_p3_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     %0:_(p5) = COPY $vgpr0
     %1:_(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 5)
     $vgpr0 = COPY %1
@@ -3577,60 +3577,60 @@ body: |
 
     ; SI-LABEL: name: test_load_private_p3_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
-    ; SI: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; CI-LABEL: name: test_load_private_p3_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
-    ; CI: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; VI-LABEL: name: test_load_private_p3_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
-    ; VI: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; GFX9-LABEL: name: test_load_private_p3_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
-    ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     %0:_(p5) = COPY $vgpr0
     %1:_(p3) = G_LOAD %0 :: (load (p3), align 2, addrspace 5)
     $vgpr0 = COPY %1
@@ -3644,116 +3644,116 @@ body: |
 
     ; SI-LABEL: name: test_load_private_p3_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; SI: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; CI-LABEL: name: test_load_private_p3_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; CI: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; VI-LABEL: name: test_load_private_p3_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; VI: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; GFX9-LABEL: name: test_load_private_p3_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     %0:_(p5) = COPY $vgpr0
     %1:_(p3) = G_LOAD %0 :: (load (p3), align 1, addrspace 5)
     $vgpr0 = COPY %1
@@ -3767,20 +3767,20 @@ body: |
 
     ; SI-LABEL: name: test_load_private_p5_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5)
-    ; SI: $vgpr0 = COPY [[LOAD]](p5)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; CI-LABEL: name: test_load_private_p5_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5)
-    ; CI: $vgpr0 = COPY [[LOAD]](p5)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; VI-LABEL: name: test_load_private_p5_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5)
-    ; VI: $vgpr0 = COPY [[LOAD]](p5)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     ; GFX9-LABEL: name: test_load_private_p5_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](p5)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     %0:_(p5) = COPY $vgpr0
     %1:_(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 5)
     $vgpr0 = COPY %1
@@ -3794,60 +3794,60 @@ body: |
 
     ; SI-LABEL: name: test_load_private_p5_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; SI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; CI-LABEL: name: test_load_private_p5_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; CI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; VI-LABEL: name: test_load_private_p5_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; VI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; GFX9-LABEL: name: test_load_private_p5_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     %0:_(p5) = COPY $vgpr0
     %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 5)
     $vgpr0 = COPY %1
@@ -3861,116 +3861,116 @@ body: |
 
     ; SI-LABEL: name: test_load_private_p5_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; SI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; CI-LABEL: name: test_load_private_p5_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; CI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; VI-LABEL: name: test_load_private_p5_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; VI: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; GFX9-LABEL: name: test_load_private_p5_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     %0:_(p5) = COPY $vgpr0
     %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 5)
     $vgpr0 = COPY %1
@@ -3984,66 +3984,66 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v2s8_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-LABEL: name: test_load_private_v2s8_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_private_v2s8_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_private_v2s8_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 5)
     %2:_(s16) = G_BITCAST %1
@@ -4059,36 +4059,36 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v2s8_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-LABEL: name: test_load_private_v2s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_private_v2s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_private_v2s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 5)
     %2:_(<2 x s32>) = G_ANYEXT %1
@@ -4103,116 +4103,116 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v3s8_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-LABEL: name: test_load_private_v3s8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_private_v3s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_private_v3s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), addrspace 5, align 4)
     %2:_(s24) = G_BITCAST %1
@@ -4228,128 +4228,128 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v3s8_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-LABEL: name: test_load_private_v3s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_private_v3s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_private_v3s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 5)
     %2:_(s24) = G_BITCAST %1
@@ -4365,88 +4365,88 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v4s8_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: $vgpr0 = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; CI-LABEL: name: test_load_private_v4s8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: $vgpr0 = COPY [[OR2]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_private_v4s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: $vgpr0 = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_private_v4s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 5)
     %2:_(s32) = G_BITCAST %1
@@ -4461,156 +4461,156 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v8s8_align8
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-LABEL: name: test_load_private_v8s8_align8
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
-    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
-    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
-    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; CI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; CI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
+    ; CI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_private_v8s8_align8
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_private_v8s8_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
-    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
-    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
-    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
+    ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 5)
     %2:_(<2 x s32>) = G_BITCAST %1
@@ -4625,340 +4625,340 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v16s8_align16
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C6]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C6]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C6]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C6]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C6]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C6]]
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C6]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C6]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C6]]
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C6]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C6]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C6]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C6]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C6]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C6]]
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C6]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C6]]
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C6]]
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; CI-LABEL: name: test_load_private_v16s8_align16
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C6]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C6]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C6]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C6]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C6]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C6]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C6]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C6]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C6]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C6]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C6]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C6]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C6]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C6]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C6]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C6]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C6]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C6]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; VI-LABEL: name: test_load_private_v16s8_align16
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
-    ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C6]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C6]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C6]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C6]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C6]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C6]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C6]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C6]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C6]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C6]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C6]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C6]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C6]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C6]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C6]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C6]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C6]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C6]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_private_v16s8_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C6]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C6]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C6]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C6]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C6]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C6]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C6]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C6]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C6]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C6]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C6]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C6]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C6]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C6]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C6]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C6]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C6]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C6]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 1, addrspace 5)
     %2:_(<4 x s32>) = G_BITCAST %1
@@ -4973,20 +4973,20 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v2s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
-    ; SI: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; CI-LABEL: name: test_load_private_v2s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
-    ; CI: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; VI-LABEL: name: test_load_private_v2s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
-    ; VI: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     ; GFX9-LABEL: name: test_load_private_v2s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 5)
     $vgpr0 = COPY %1
@@ -5000,54 +5000,54 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v2s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; CI-LABEL: name: test_load_private_v2s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; VI-LABEL: name: test_load_private_v2s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; GFX9-LABEL: name: test_load_private_v2s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 5)
     $vgpr0 = COPY %1
@@ -5061,133 +5061,133 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v2s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; CI-LABEL: name: test_load_private_v2s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; VI-LABEL: name: test_load_private_v2s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     ; GFX9-LABEL: name: test_load_private_v2s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 5)
     $vgpr0 = COPY %1
@@ -5201,119 +5201,119 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v3s16_align8
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
-    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-LABEL: name: test_load_private_v3s16_align8
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
-    ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_private_v3s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
-    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_private_v3s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
-    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD1]](s32), [[BITCAST1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD1]](s32), [[BITCAST1]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 5)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -5329,123 +5329,123 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v3s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-LABEL: name: test_load_private_v3s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_private_v3s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_private_v3s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 5)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -5461,240 +5461,240 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v3s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C7]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
-    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C7]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-LABEL: name: test_load_private_v3s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C7]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
-    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C7]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_private_v3s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C6]]
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C6]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
-    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C6]]
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C6]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_private_v3s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 5)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -5709,36 +5709,36 @@ body: |
     liveins: $vgpr0
     ; SI-LABEL: name: test_load_private_v4s16_align8
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>)
-    ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; CI-LABEL: name: test_load_private_v4s16_align8
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; VI-LABEL: name: test_load_private_v4s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_private_v4s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -5752,36 +5752,36 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v4s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>)
-    ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; CI-LABEL: name: test_load_private_v4s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; VI-LABEL: name: test_load_private_v4s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_private_v4s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -5794,94 +5794,94 @@ body: |
     liveins: $vgpr0
     ; SI-LABEL: name: test_load_private_v4s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; CI-LABEL: name: test_load_private_v4s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; VI-LABEL: name: test_load_private_v4s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_private_v4s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -5895,243 +5895,243 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v4s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
-    ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
+    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; CI-LABEL: name: test_load_private_v4s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY1]](s32)
+    ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; CI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
+    ; CI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; CI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY3]](s32)
+    ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; VI-LABEL: name: test_load_private_v4s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C4]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]]
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C4]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_private_v4s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
-    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
-    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
-    ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
+    ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -6145,36 +6145,36 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v2s32_align8
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-LABEL: name: test_load_private_v2s32_align8
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_private_v2s32_align8
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_private_v2s32_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -6188,36 +6188,36 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v2s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-LABEL: name: test_load_private_v2s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_private_v2s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_private_v2s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -6231,96 +6231,96 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v2s32_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-LABEL: name: test_load_private_v2s32_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_private_v2s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_private_v2s32_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -6334,192 +6334,192 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v2s32_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-LABEL: name: test_load_private_v2s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_private_v2s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_private_v2s32_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -6533,264 +6533,264 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v3s32_align16
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; CI-LABEL: name: test_load_private_v3s32_align16
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; VI-LABEL: name: test_load_private_v3s32_align16
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-LABEL: name: test_load_private_v3s32_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 1, addrspace 5)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -6804,48 +6804,48 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v3s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; CI-LABEL: name: test_load_private_v3s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; VI-LABEL: name: test_load_private_v3s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-LABEL: name: test_load_private_v3s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 5)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -6859,340 +6859,340 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v4s32_align16
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; CI-LABEL: name: test_load_private_v4s32_align16
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; VI-LABEL: name: test_load_private_v4s32_align16
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_private_v4s32_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 1, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -7206,60 +7206,60 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v4s32_align8
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; CI-LABEL: name: test_load_private_v4s32_align8
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; VI-LABEL: name: test_load_private_v4s32_align8
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_private_v4s32_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -7273,60 +7273,60 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v4s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; CI-LABEL: name: test_load_private_v4s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; VI-LABEL: name: test_load_private_v4s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_private_v4s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -7340,168 +7340,168 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v4s32_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; CI-LABEL: name: test_load_private_v4s32_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; VI-LABEL: name: test_load_private_v4s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_private_v4s32_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 2, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -7515,340 +7515,340 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v4s32_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; CI-LABEL: name: test_load_private_v4s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; VI-LABEL: name: test_load_private_v4s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_private_v4s32_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 1, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -7862,108 +7862,108 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v8s32_align32
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     ; CI-LABEL: name: test_load_private_v8s32_align32
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     ; VI-LABEL: name: test_load_private_v8s32_align32
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     ; GFX9-LABEL: name: test_load_private_v8s32_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 32, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -7977,204 +7977,204 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v16s32_align32
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5)
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5)
-    ; SI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5)
-    ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5)
-    ; SI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5)
-    ; SI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5)
-    ; SI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5)
-    ; SI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5)
+    ; SI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5)
+    ; SI-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5)
+    ; SI-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5)
+    ; SI-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5)
+    ; SI-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>)
     ; CI-LABEL: name: test_load_private_v16s32_align32
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5)
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5)
-    ; CI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5)
-    ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5)
-    ; CI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5)
-    ; CI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5)
-    ; CI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5)
-    ; CI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5)
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5)
+    ; CI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5)
+    ; CI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5)
+    ; CI-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5)
+    ; CI-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5)
+    ; CI-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5)
+    ; CI-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>)
     ; VI-LABEL: name: test_load_private_v16s32_align32
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5)
-    ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5)
-    ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5)
-    ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5)
-    ; VI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5)
-    ; VI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5)
-    ; VI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5)
-    ; VI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5)
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5)
+    ; VI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5)
+    ; VI-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5)
+    ; VI-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5)
+    ; VI-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5)
+    ; VI-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>)
     ; GFX9-LABEL: name: test_load_private_v16s32_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5)
-    ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5)
-    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5)
-    ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5)
-    ; GFX9: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5)
-    ; GFX9: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5)
-    ; GFX9: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5)
-    ; GFX9: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5)
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5)
+    ; GFX9-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5)
+    ; GFX9-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5)
+    ; GFX9-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5)
+    ; GFX9-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5)
+    ; GFX9-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5)
+    ; GFX9-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 32, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
@@ -8188,64 +8188,64 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v2s64_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; CI-LABEL: name: test_load_private_v2s64_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; VI-LABEL: name: test_load_private_v2s64_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-LABEL: name: test_load_private_v2s64_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -8259,344 +8259,344 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v2s64_align16
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; CI-LABEL: name: test_load_private_v2s64_align16
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; VI-LABEL: name: test_load_private_v2s64_align16
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-LABEL: name: test_load_private_v2s64_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -8610,96 +8610,96 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v3s64_align32
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; SI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; CI-LABEL: name: test_load_private_v3s64_align32
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_private_v3s64_align32
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_private_v3s64_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 5)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -8715,112 +8715,112 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v4s64_align32
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
-    ; SI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; SI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
+    ; SI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
     ; CI-LABEL: name: test_load_private_v4s64_align32
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
-    ; CI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
+    ; CI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
     ; VI-LABEL: name: test_load_private_v4s64_align32
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
-    ; VI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
+    ; VI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_private_v4s64_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
-    ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
+    ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 32, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -8834,64 +8834,64 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v2p1_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; CI-LABEL: name: test_load_private_v2p1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; VI-LABEL: name: test_load_private_v2p1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-LABEL: name: test_load_private_v2p1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -8905,112 +8905,112 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v4p1_align8
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5)
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
     ; CI-LABEL: name: test_load_private_v4p1_align8
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5)
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5)
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
     ; VI-LABEL: name: test_load_private_v4p1_align8
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
     ; GFX9-LABEL: name: test_load_private_v4p1_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5)
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5)
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<4 x p1>) = G_LOAD %0 :: (load (<4 x p1>), align 8, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -9024,36 +9024,36 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v2p3_align8
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
-    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
     ; CI-LABEL: name: test_load_private_v2p3_align8
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
-    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
     ; VI-LABEL: name: test_load_private_v2p3_align8
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
-    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
     ; GFX9-LABEL: name: test_load_private_v2p3_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -9067,20 +9067,20 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_private_s32_from_1_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-LABEL: name: test_ext_load_private_s32_from_1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_ext_load_private_s32_from_1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_ext_load_private_s32_from_1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 5)
     $vgpr0 = COPY %1
@@ -9094,20 +9094,20 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_private_s32_from_2_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
-    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; CI-LABEL: name: test_ext_load_private_s32_from_2_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
-    ; CI: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; VI-LABEL: name: test_ext_load_private_s32_from_2_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
-    ; VI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     ; GFX9-LABEL: name: test_ext_load_private_s32_from_2_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
-    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load (s16), align 4, addrspace 5)
     $vgpr0 = COPY %1
@@ -9122,24 +9122,24 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_private_s64_from_1_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_ext_load_private_s64_from_1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_private_s64_from_1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_private_s64_from_1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -9153,24 +9153,24 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_private_s64_from_2_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_ext_load_private_s64_from_2_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_private_s64_from_2_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_private_s64_from_2_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -9184,24 +9184,24 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_private_s64_from_4_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_ext_load_private_s64_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_private_s64_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_private_s64_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s32), align 4, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -9215,36 +9215,36 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_private_s128_from_4_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; SI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; SI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; SI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; CI-LABEL: name: test_ext_load_private_s128_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; CI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; CI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; VI-LABEL: name: test_ext_load_private_s128_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; GFX9-LABEL: name: test_ext_load_private_s128_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     %0:_(p5) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load (s32), align 4, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -9258,24 +9258,24 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_private_s64_from_2_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_ext_load_private_s64_from_2_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_private_s64_from_2_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_private_s64_from_2_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -9289,24 +9289,24 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_private_s64_from_1_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_ext_load_private_s64_from_1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
+    ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_private_s64_from_1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_private_s64_from_1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5)
+    ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -9320,20 +9320,20 @@ body: |
 
     ; SI-LABEL: name: test_extload_private_v2s32_from_4_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-LABEL: name: test_extload_private_v2s32_from_4_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_extload_private_v2s32_from_4_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-LABEL: name: test_extload_private_v2s32_from_4_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -9347,20 +9347,20 @@ body: |
 
     ; SI-LABEL: name: test_extload_private_v2s32_from_4_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-LABEL: name: test_extload_private_v2s32_from_4_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_extload_private_v2s32_from_4_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-LABEL: name: test_extload_private_v2s32_from_4_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -9374,20 +9374,20 @@ body: |
 
     ; SI-LABEL: name: test_extload_private_v2s32_from_4_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; CI-LABEL: name: test_extload_private_v2s32_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; VI-LABEL: name: test_extload_private_v2s32_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     ; GFX9-LABEL: name: test_extload_private_v2s32_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -9401,20 +9401,20 @@ body: |
 
     ; SI-LABEL: name: test_extload_private_v3s32_from_6_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; CI-LABEL: name: test_extload_private_v3s32_from_6_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; VI-LABEL: name: test_extload_private_v3s32_from_6_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     ; GFX9-LABEL: name: test_extload_private_v3s32_from_6_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 5)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -9428,20 +9428,20 @@ body: |
 
     ; SI-LABEL: name: test_extload_private_v4s32_from_8_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; CI-LABEL: name: test_extload_private_v4s32_from_8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; VI-LABEL: name: test_extload_private_v4s32_from_8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX9-LABEL: name: test_extload_private_v4s32_from_8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -9455,508 +9455,508 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v2s96_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; SI: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
-    ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5)
-    ; SI: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
-    ; SI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5)
-    ; SI: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
-    ; SI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5)
-    ; SI: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
-    ; SI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5)
-    ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
-    ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
-    ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; SI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
-    ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
-    ; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
-    ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
-    ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; SI: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
-    ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5)
-    ; SI: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
-    ; SI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5)
-    ; SI: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
-    ; SI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5)
-    ; SI: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
-    ; SI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5)
-    ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
-    ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
-    ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; SI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
-    ; SI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
-    ; SI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
-    ; SI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
-    ; SI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; SI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; SI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
+    ; SI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
+    ; SI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5)
+    ; SI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
+    ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
+    ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
+    ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
+    ; SI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; SI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
+    ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
+    ; SI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
+    ; SI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5)
+    ; SI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
+    ; SI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
+    ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; SI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; SI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
+    ; SI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
+    ; SI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; SI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
+    ; SI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
+    ; SI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-LABEL: name: test_load_private_v2s96_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; CI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; CI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CI: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
-    ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5)
-    ; CI: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
-    ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5)
-    ; CI: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
-    ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5)
-    ; CI: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
-    ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5)
-    ; CI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
-    ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
-    ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; CI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; CI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
-    ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
-    ; CI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
-    ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
-    ; CI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; CI: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
-    ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5)
-    ; CI: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
-    ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5)
-    ; CI: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
-    ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5)
-    ; CI: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
-    ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5)
-    ; CI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
-    ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
-    ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; CI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; CI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
-    ; CI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
-    ; CI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
-    ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
-    ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; CI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; CI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; CI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; CI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; CI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; CI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; CI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; CI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; CI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; CI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; CI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
+    ; CI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
+    ; CI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5)
+    ; CI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
+    ; CI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
+    ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; CI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
+    ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
+    ; CI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; CI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
+    ; CI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
+    ; CI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; CI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
+    ; CI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5)
+    ; CI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
+    ; CI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
+    ; CI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; CI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; CI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
+    ; CI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
+    ; CI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; CI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
+    ; CI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
+    ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; VI-LABEL: name: test_load_private_v2s96_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; VI: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
-    ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5)
-    ; VI: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
-    ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5)
-    ; VI: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
-    ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5)
-    ; VI: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
-    ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5)
-    ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
-    ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
-    ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; VI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
-    ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
-    ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
-    ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
-    ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; VI: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
-    ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5)
-    ; VI: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
-    ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5)
-    ; VI: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
-    ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5)
-    ; VI: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
-    ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5)
-    ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
-    ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
-    ; VI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
-    ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
-    ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
-    ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
-    ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; VI-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; VI-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
+    ; VI-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
+    ; VI-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5)
+    ; VI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
+    ; VI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
+    ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; VI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
+    ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
+    ; VI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; VI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
+    ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
+    ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
+    ; VI-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5)
+    ; VI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
+    ; VI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
+    ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; VI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
+    ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
+    ; VI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; VI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
+    ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
+    ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-LABEL: name: test_load_private_v2s96_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
-    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
-    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
-    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
-    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
-    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
-    ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
-    ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
-    ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
-    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
-    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
-    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
-    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; GFX9: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
-    ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5)
-    ; GFX9: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
-    ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5)
-    ; GFX9: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
-    ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5)
-    ; GFX9: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
-    ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5)
-    ; GFX9: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
-    ; GFX9: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
-    ; GFX9: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; GFX9: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; GFX9: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
-    ; GFX9: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
-    ; GFX9: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; GFX9: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
-    ; GFX9: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
-    ; GFX9: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; GFX9: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
-    ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5)
-    ; GFX9: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
-    ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5)
-    ; GFX9: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
-    ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5)
-    ; GFX9: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
-    ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5)
-    ; GFX9: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
-    ; GFX9: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
-    ; GFX9: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; GFX9: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; GFX9: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
-    ; GFX9: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
-    ; GFX9: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; GFX9: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
-    ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
-    ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C3]]
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C3]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C3]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C3]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C3]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C3]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C3]]
+    ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C3]]
+    ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C3]]
+    ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32)
+    ; GFX9-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD12]], [[C3]]
+    ; GFX9-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LOAD13]], [[C3]]
+    ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX9-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LOAD14]], [[C3]]
+    ; GFX9-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX9-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LOAD15]], [[C3]]
+    ; GFX9-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
+    ; GFX9-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5)
+    ; GFX9-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LOAD16]], [[C3]]
+    ; GFX9-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LOAD17]], [[C3]]
+    ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
+    ; GFX9-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LOAD18]], [[C3]]
+    ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; GFX9-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LOAD19]], [[C3]]
+    ; GFX9-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; GFX9-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5)
+    ; GFX9-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LOAD20]], [[C3]]
+    ; GFX9-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LOAD21]], [[C3]]
+    ; GFX9-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; GFX9-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
+    ; GFX9-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LOAD22]], [[C3]]
+    ; GFX9-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
+    ; GFX9-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; GFX9-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LOAD23]], [[C3]]
+    ; GFX9-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
+    ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 5)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -9973,256 +9973,256 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v2s96_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
-    ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
-    ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5)
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
+    ; SI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; SI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
+    ; SI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
+    ; SI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5)
+    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
+    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-LABEL: name: test_load_private_v2s96_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
-    ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5)
-    ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
-    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5)
-    ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5)
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
-    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
+    ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; CI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
+    ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
+    ; CI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; CI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5)
+    ; CI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
+    ; CI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
+    ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; CI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
+    ; CI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
+    ; CI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5)
+    ; CI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
+    ; CI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
+    ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; VI-LABEL: name: test_load_private_v2s96_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
-    ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
-    ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5)
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
+    ; VI-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; VI-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5)
+    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
+    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
+    ; VI-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
+    ; VI-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5)
+    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
+    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-LABEL: name: test_load_private_v2s96_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
-    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
-    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
-    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
-    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
-    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
-    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
-    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
-    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
-    ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5)
-    ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
-    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
-    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
-    ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5)
-    ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5)
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
-    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
-    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5)
+    ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C1]]
+    ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C1]]
+    ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5)
+    ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C1]]
+    ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C1]]
+    ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; GFX9-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5)
+    ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C1]]
+    ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LOAD7]], [[C1]]
+    ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; GFX9-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
+    ; GFX9-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5)
+    ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD8]], [[C1]]
+    ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LOAD9]], [[C1]]
+    ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; GFX9-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
+    ; GFX9-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5)
+    ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LOAD10]], [[C1]]
+    ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LOAD11]], [[C1]]
+    ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 5)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -10239,100 +10239,100 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v2s96_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-LABEL: name: test_load_private_v2s96_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; VI-LABEL: name: test_load_private_v2s96_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-LABEL: name: test_load_private_v2s96_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 5)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -10349,100 +10349,100 @@ body: |
 
     ; SI-LABEL: name: test_load_private_v2s96_align16
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32)
+    ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32)
+    ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-LABEL: name: test_load_private_v2s96_align16
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5)
-    ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32)
+    ; CI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5)
+    ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32)
+    ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; VI-LABEL: name: test_load_private_v2s96_align16
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32)
+    ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32)
+    ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-LABEL: name: test_load_private_v2s96_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
-    ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32)
-    ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5)
-    ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32)
-    ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32)
+    ; GFX9-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5)
+    ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32)
+    ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
+    ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 5)
     %2:_(s96) = G_EXTRACT %1, 0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpy.mir
index be57535fbffe4..ad0caaaa6114b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpy.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpy.mir
@@ -9,14 +9,14 @@ body:             |
 
     ; CHECK-LABEL: name: memcpy_test
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV1]](p0) :: (load (s8))
-    ; CHECK: G_STORE [[LOAD]](s32), [[MV]](p0) :: (store (s8))
-    ; CHECK: S_ENDPGM 0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV1]](p0) :: (load (s8))
+    ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[MV]](p0) :: (store (s8))
+    ; CHECK-NEXT: S_ENDPGM 0
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(p0) = G_MERGE_VALUES %0:_(s32), %1:_(s32)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpyinline.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpyinline.mir
index cab18296cafda..e649761df9778 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpyinline.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpyinline.mir
@@ -9,14 +9,14 @@ body:             |
 
     ; CHECK-LABEL: name: memcpyinline_test
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV1]](p0) :: (load (s8))
-    ; CHECK: G_STORE [[LOAD]](s32), [[MV]](p0) :: (store (s8))
-    ; CHECK: S_ENDPGM 0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV1]](p0) :: (load (s8))
+    ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[MV]](p0) :: (store (s8))
+    ; CHECK-NEXT: S_ENDPGM 0
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(p0) = G_MERGE_VALUES %0:_(s32), %1:_(s32)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memmove.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memmove.mir
index 210ebab122292..14164cf6fc4c2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memmove.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memmove.mir
@@ -9,14 +9,14 @@ body:             |
 
     ; CHECK-LABEL: name: memmove_test
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV1]](p0) :: (load (s8))
-    ; CHECK: G_STORE [[LOAD]](s32), [[MV]](p0) :: (store (s8))
-    ; CHECK: S_ENDPGM 0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV1]](p0) :: (load (s8))
+    ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[MV]](p0) :: (store (s8))
+    ; CHECK-NEXT: S_ENDPGM 0
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(p0) = G_MERGE_VALUES %0:_(s32), %1:_(s32)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir
index 36815c34407b6..5a67aac457672 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir
@@ -9,13 +9,13 @@ body:             |
 
     ; CHECK-LABEL: name: memset_test
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
-    ; CHECK: G_STORE [[COPY2]](s32), [[MV]](p0) :: (store (s8))
-    ; CHECK: S_ENDPGM 0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
+    ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[MV]](p0) :: (store (s8))
+    ; CHECK-NEXT: S_ENDPGM 0
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(p0) = G_MERGE_VALUES %0:_(s32), %1:_(s32)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values-build-vector.mir
index ae0573ab8272b..123454a26af2b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values-build-vector.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values-build-vector.mir
@@ -7,9 +7,9 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_merge_s32_s32_s64
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(s32) = G_CONSTANT i32 0
     %1:_(s32) = G_CONSTANT i32 1
     %2:_(s64) = G_MERGE_VALUES %0:_(s32), %1:_(s32)
@@ -22,9 +22,9 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_merge_s32_s32_v2s32
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(s32) = G_CONSTANT i32 0
     %1:_(s32) = G_CONSTANT i32 1
     %2:_(<2 x s32>) = G_BUILD_VECTOR %0:_(s32), %1:_(s32)
@@ -37,10 +37,10 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_merge_s32_s32_s32_v3s32
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     %0:_(s32) = G_CONSTANT i32 0
     %1:_(s32) = G_CONSTANT i32 1
     %2:_(s32) = G_CONSTANT i32 2
@@ -54,9 +54,9 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_merge_s64_s64_s128
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(s64) = G_CONSTANT i64 0
     %1:_(s64) = G_CONSTANT i64 1
     %2:_(<2 x s64>) = G_BUILD_VECTOR %0(s64), %1(s64)
@@ -69,11 +69,11 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_merge_s64_s64_s64_s64_v4s64
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64), [[C2]](s64), [[C3]](s64)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64), [[C2]](s64), [[C3]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
     %0:_(s64) = G_CONSTANT i64 0
     %1:_(s64) = G_CONSTANT i64 1
     %2:_(s64) = G_CONSTANT i64 2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir
index c2c4edd1b7603..6a780fb948b66 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir
@@ -14,54 +14,54 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
     ; CHECK-LABEL: name: test_merge_p1_s8
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-    ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY8]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY9]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32)
-    ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]]
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]]
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY10]](s32)
-    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32)
-    ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]]
-    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]]
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
-    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C3]](s32)
-    ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY8]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY9]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32)
+    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]]
+    ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]]
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY10]](s32)
+    ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32)
+    ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]]
+    ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]]
+    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
+    ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C3]](s32)
+    ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -89,16 +89,16 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_merge_s16_s8_s8
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
-    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[C2]], [[C1]]
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C3]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C2]], [[C1]]
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C3]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s8) = G_CONSTANT i8 0
     %1:_(s8) = G_CONSTANT i8 1
     %2:_(s16) = G_MERGE_VALUES %0, %1
@@ -112,29 +112,29 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_merge_s24_s8_s8_s8
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
-    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[C2]], [[C1]]
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]]
-    ; CHECK: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
-    ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C5]], [[C1]]
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C3]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]]
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CHECK: $vgpr0 = COPY [[OR2]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C2]], [[C1]]
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]]
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C5]], [[C1]]
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]]
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C3]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]]
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(s8) = G_CONSTANT i8 0
     %1:_(s8) = G_CONSTANT i8 1
     %2:_(s8) = G_CONSTANT i8 2
@@ -149,23 +149,23 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_merge_s32_s8_s8_s8_s8
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C4]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]]
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C5]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C6]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CHECK: $vgpr0 = COPY [[OR2]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C4]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]]
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C5]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C6]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[OR2]](s32)
     %0:_(s8) = G_CONSTANT i8 0
     %1:_(s8) = G_CONSTANT i8 1
     %2:_(s8) = G_CONSTANT i8 2
@@ -181,9 +181,9 @@ body: |
     liveins: $vgpr0, $vgpr1
     ; CHECK-LABEL: name: test_merge_s64_s32_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-    ; CHECK: $vgpr1_vgpr2 = COPY [[MV]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[MV]](s64)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s64) = G_MERGE_VALUES %0, %1
@@ -197,21 +197,21 @@ body: |
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
     ; CHECK-LABEL: name: test_merge_s64_s16_s16_s16_s16
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CHECK: $vgpr1_vgpr2 = COPY [[MV]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[MV]](s64)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -231,55 +231,55 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_merge_s24_s4_s4_s4_s4_s4_s4
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
-    ; CHECK: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
-    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[C6]], [[C5]]
-    ; CHECK: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]]
-    ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[TRUNC1]]
-    ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[TRUNC2]]
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[C7]](s16)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[COPY6]], [[C5]]
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[COPY7]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC3]]
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C8]]
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C9]](s32)
-    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[TRUNC4]]
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C8]]
-    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C10]](s32)
-    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
-    ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[TRUNC5]]
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
-    ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C11]](s32)
-    ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]]
-    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s24) = G_TRUNC [[OR6]](s32)
-    ; CHECK: S_NOP 0, implicit [[TRUNC6]](s24)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C6]], [[C5]]
+    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]]
+    ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[TRUNC1]]
+    ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[TRUNC2]]
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[C7]](s16)
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[COPY6]], [[C5]]
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[COPY7]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC3]]
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C8]]
+    ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C9]](s32)
+    ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[TRUNC4]]
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C8]]
+    ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C10]](s32)
+    ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
+    ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[TRUNC5]]
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
+    ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C11]](s32)
+    ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]]
+    ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s24) = G_TRUNC [[OR6]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC6]](s24)
     %0:_(s4) = G_CONSTANT i4 0
     %1:_(s4) = G_CONSTANT i4 1
     %2:_(s4) = G_CONSTANT i4 2
@@ -296,56 +296,56 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_merge_s28_s4_s4_s4_s4_s4_s4_s4
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
-    ; CHECK: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
-    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[C7]], [[C6]]
-    ; CHECK: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]]
-    ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[TRUNC1]]
-    ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[TRUNC2]]
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[C8]](s16)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[COPY6]], [[C6]]
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[COPY7]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC3]]
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C10]](s32)
-    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[TRUNC4]]
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C9]]
-    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C11]](s32)
-    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
-    ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[TRUNC5]]
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
-    ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C12]](s32)
-    ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]]
-    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s28) = G_TRUNC [[OR6]](s32)
-    ; CHECK: S_NOP 0, implicit [[TRUNC6]](s28)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C7]], [[C6]]
+    ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]]
+    ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[TRUNC1]]
+    ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[TRUNC2]]
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[C8]](s16)
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[COPY6]], [[C6]]
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[COPY7]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC3]]
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
+    ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C10]](s32)
+    ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[TRUNC4]]
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C9]]
+    ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C11]](s32)
+    ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
+    ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[TRUNC5]]
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
+    ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C12]](s32)
+    ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]]
+    ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s28) = G_TRUNC [[OR6]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC6]](s28)
     %0:_(s4) = G_CONSTANT i4 0
     %1:_(s4) = G_CONSTANT i4 1
     %2:_(s4) = G_CONSTANT i4 2
@@ -363,42 +363,42 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_merge_s32_s4_s4_s4_s4_s4_s4_s4_s4
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C4]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]]
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C8]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C9]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C10]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]]
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C11]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
-    ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C12]](s32)
-    ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
-    ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C13]](s32)
-    ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]]
-    ; CHECK: S_NOP 0, implicit [[OR6]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C4]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]]
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C8]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C9]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+    ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C10]](s32)
+    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]]
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
+    ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C11]](s32)
+    ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+    ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C12]](s32)
+    ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
+    ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C13]](s32)
+    ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]]
+    ; CHECK-NEXT: S_NOP 0, implicit [[OR6]](s32)
     %0:_(s4) = G_CONSTANT i4 0
     %1:_(s4) = G_CONSTANT i4 1
     %2:_(s4) = G_CONSTANT i4 2
@@ -417,49 +417,49 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_merge_s64_s8_s8_s8_s8_s8_s8_s8_s8
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CHECK: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
-    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[C5]], [[C4]]
-    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]]
-    ; CHECK: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
-    ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C7]], [[C4]]
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]]
-    ; CHECK: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
-    ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C8]], [[C4]]
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC2]]
-    ; CHECK: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 6
-    ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C9]], [[C4]]
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C6]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC3]]
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C5]], [[C4]]
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]]
+    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C7]], [[C4]]
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]]
+    ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C8]], [[C4]]
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC2]]
+    ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 6
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C9]], [[C4]]
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C6]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC3]]
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(s8) = G_CONSTANT i8 0
     %1:_(s8) = G_CONSTANT i8 1
     %2:_(s8) = G_CONSTANT i8 2
@@ -478,71 +478,71 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_merge_s96_s8_s8_s8_s8_s8_s8_s8_s8_s8_s8_s8_s8
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
-    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
-    ; CHECK: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CHECK: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
-    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[C8]], [[C7]]
-    ; CHECK: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]]
-    ; CHECK: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
-    ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C10]], [[C7]]
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]]
-    ; CHECK: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
-    ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C11]], [[C7]]
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC2]]
-    ; CHECK: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 6
-    ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C12]], [[C7]]
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[COPY6]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC3]]
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[C9]](s16)
-    ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[COPY8]], [[C7]]
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[COPY9]](s32)
-    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC4]]
-    ; CHECK: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 10
-    ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[C13]], [[C7]]
-    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
-    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[COPY11]](s32)
-    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
-    ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND5]], [[TRUNC5]]
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C14]](s32)
-    ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]]
-    ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C14]](s32)
-    ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]]
-    ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; CHECK: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
-    ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C14]](s32)
-    ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]]
-    ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C8]], [[C7]]
+    ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]]
+    ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C10]], [[C7]]
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]]
+    ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C11]], [[C7]]
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC2]]
+    ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 6
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C12]], [[C7]]
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[COPY6]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC3]]
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[C9]](s16)
+    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[COPY8]], [[C7]]
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+    ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
+    ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[COPY9]](s32)
+    ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC4]]
+    ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 10
+    ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[C13]], [[C7]]
+    ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+    ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+    ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[COPY11]](s32)
+    ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
+    ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND5]], [[TRUNC5]]
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C14]](s32)
+    ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]]
+    ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C14]](s32)
+    ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]]
+    ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
+    ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C14]](s32)
+    ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
     %0:_(s8) = G_CONSTANT i8 0
     %1:_(s8) = G_CONSTANT i8 1
     %2:_(s8) = G_CONSTANT i8 2
@@ -566,20 +566,20 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_merge_s96_s16_s16_s16_s16_s16_s16
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C2]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL1]]
-    ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C2]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C5]], [[SHL2]]
-    ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL1]]
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C5]], [[SHL2]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
     %0:_(s16) = G_CONSTANT i16 0
     %1:_(s16) = G_CONSTANT i16 1
     %2:_(s16) = G_CONSTANT i16 2
@@ -596,51 +596,51 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_merge_s56_s8_s8_s8_s8_s8_s8_s8
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
-    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[C4]], [[C3]]
-    ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]]
-    ; CHECK: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
-    ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C7]], [[C3]]
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]]
-    ; CHECK: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
-    ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C8]], [[C3]]
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC2]]
-    ; CHECK: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 6
-    ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C9]], [[C3]]
-    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C6]]
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C5]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC3]]
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
-    ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s56) = G_TRUNC [[MV]](s64)
-    ; CHECK: S_NOP 0, implicit [[TRUNC4]](s56)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C4]], [[C3]]
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]]
+    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C7]], [[C3]]
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]]
+    ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C8]], [[C3]]
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32)
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC2]]
+    ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 6
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C9]], [[C3]]
+    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C6]]
+    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C5]](s32)
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC3]]
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32)
+    ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s56) = G_TRUNC [[MV]](s64)
+    ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC4]](s56)
     %0:_(s8) = G_CONSTANT i8 0
     %1:_(s8) = G_CONSTANT i8 1
     %2:_(s8) = G_CONSTANT i8 2
@@ -658,322 +658,322 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_merge_s68_s17_s17_s17_s17
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]]
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C2]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C3]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C4]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]]
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C5]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C6]](s32)
-    ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C7]](s32)
-    ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]]
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C8]](s32)
-    ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
-    ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C9]](s32)
-    ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-    ; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C10]](s32)
-    ; CHECK: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]]
-    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
-    ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C11]](s32)
-    ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[C12]](s32)
-    ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
-    ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
-    ; CHECK: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C13]](s32)
-    ; CHECK: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]]
-    ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
-    ; CHECK: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[COPY14]], [[C14]](s32)
-    ; CHECK: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
-    ; CHECK: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C15]](s32)
-    ; CHECK: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C16]](s32)
-    ; CHECK: [[OR15:%[0-9]+]]:_(s32) = G_OR [[OR14]], [[SHL15]]
-    ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
-    ; CHECK: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C17]](s32)
-    ; CHECK: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 18
-    ; CHECK: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C18]](s32)
-    ; CHECK: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
-    ; CHECK: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C19]](s32)
-    ; CHECK: [[OR18:%[0-9]+]]:_(s32) = G_OR [[OR17]], [[SHL18]]
-    ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; CHECK: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C20]](s32)
-    ; CHECK: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
-    ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 21
-    ; CHECK: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[COPY21]], [[C21]](s32)
-    ; CHECK: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
-    ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 22
-    ; CHECK: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[COPY22]], [[C22]](s32)
-    ; CHECK: [[OR21:%[0-9]+]]:_(s32) = G_OR [[OR20]], [[SHL21]]
-    ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
-    ; CHECK: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[COPY23]], [[C23]](s32)
-    ; CHECK: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
-    ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CHECK: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[COPY24]], [[C24]](s32)
-    ; CHECK: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
-    ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
-    ; CHECK: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[COPY25]], [[C25]](s32)
-    ; CHECK: [[OR24:%[0-9]+]]:_(s32) = G_OR [[OR23]], [[SHL24]]
-    ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 26
-    ; CHECK: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[COPY26]], [[C26]](s32)
-    ; CHECK: [[OR25:%[0-9]+]]:_(s32) = G_OR [[OR24]], [[SHL25]]
-    ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 27
-    ; CHECK: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[COPY27]], [[C27]](s32)
-    ; CHECK: [[OR26:%[0-9]+]]:_(s32) = G_OR [[OR25]], [[SHL26]]
-    ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-    ; CHECK: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[COPY28]], [[C28]](s32)
-    ; CHECK: [[OR27:%[0-9]+]]:_(s32) = G_OR [[OR26]], [[SHL27]]
-    ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
-    ; CHECK: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[COPY29]], [[C29]](s32)
-    ; CHECK: [[OR28:%[0-9]+]]:_(s32) = G_OR [[OR27]], [[SHL28]]
-    ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
-    ; CHECK: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[COPY30]], [[C30]](s32)
-    ; CHECK: [[OR29:%[0-9]+]]:_(s32) = G_OR [[OR28]], [[SHL29]]
-    ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
-    ; CHECK: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[COPY31]], [[C31]](s32)
-    ; CHECK: [[OR30:%[0-9]+]]:_(s32) = G_OR [[OR29]], [[SHL30]]
-    ; CHECK: [[COPY32:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[COPY33:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL31:%[0-9]+]]:_(s32) = G_SHL [[COPY33]], [[C1]](s32)
-    ; CHECK: [[OR31:%[0-9]+]]:_(s32) = G_OR [[COPY32]], [[SHL31]]
-    ; CHECK: [[COPY34:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL32:%[0-9]+]]:_(s32) = G_SHL [[COPY34]], [[C2]](s32)
-    ; CHECK: [[OR32:%[0-9]+]]:_(s32) = G_OR [[OR31]], [[SHL32]]
-    ; CHECK: [[COPY35:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[SHL33:%[0-9]+]]:_(s32) = G_SHL [[COPY35]], [[C3]](s32)
-    ; CHECK: [[OR33:%[0-9]+]]:_(s32) = G_OR [[OR32]], [[SHL33]]
-    ; CHECK: [[COPY36:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL34:%[0-9]+]]:_(s32) = G_SHL [[COPY36]], [[C4]](s32)
-    ; CHECK: [[OR34:%[0-9]+]]:_(s32) = G_OR [[OR33]], [[SHL34]]
-    ; CHECK: [[COPY37:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL35:%[0-9]+]]:_(s32) = G_SHL [[COPY37]], [[C5]](s32)
-    ; CHECK: [[OR35:%[0-9]+]]:_(s32) = G_OR [[OR34]], [[SHL35]]
-    ; CHECK: [[COPY38:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL36:%[0-9]+]]:_(s32) = G_SHL [[COPY38]], [[C6]](s32)
-    ; CHECK: [[OR36:%[0-9]+]]:_(s32) = G_OR [[OR35]], [[SHL36]]
-    ; CHECK: [[COPY39:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL37:%[0-9]+]]:_(s32) = G_SHL [[COPY39]], [[C7]](s32)
-    ; CHECK: [[OR37:%[0-9]+]]:_(s32) = G_OR [[OR36]], [[SHL37]]
-    ; CHECK: [[COPY40:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL38:%[0-9]+]]:_(s32) = G_SHL [[COPY40]], [[C8]](s32)
-    ; CHECK: [[OR38:%[0-9]+]]:_(s32) = G_OR [[OR37]], [[SHL38]]
-    ; CHECK: [[COPY41:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL39:%[0-9]+]]:_(s32) = G_SHL [[COPY41]], [[C9]](s32)
-    ; CHECK: [[OR39:%[0-9]+]]:_(s32) = G_OR [[OR38]], [[SHL39]]
-    ; CHECK: [[COPY42:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL40:%[0-9]+]]:_(s32) = G_SHL [[COPY42]], [[C10]](s32)
-    ; CHECK: [[OR40:%[0-9]+]]:_(s32) = G_OR [[OR39]], [[SHL40]]
-    ; CHECK: [[COPY43:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL41:%[0-9]+]]:_(s32) = G_SHL [[COPY43]], [[C11]](s32)
-    ; CHECK: [[OR41:%[0-9]+]]:_(s32) = G_OR [[OR40]], [[SHL41]]
-    ; CHECK: [[COPY44:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL42:%[0-9]+]]:_(s32) = G_SHL [[COPY44]], [[C12]](s32)
-    ; CHECK: [[OR42:%[0-9]+]]:_(s32) = G_OR [[OR41]], [[SHL42]]
-    ; CHECK: [[COPY45:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL43:%[0-9]+]]:_(s32) = G_SHL [[COPY45]], [[C13]](s32)
-    ; CHECK: [[OR43:%[0-9]+]]:_(s32) = G_OR [[OR42]], [[SHL43]]
-    ; CHECK: [[COPY46:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL44:%[0-9]+]]:_(s32) = G_SHL [[COPY46]], [[C14]](s32)
-    ; CHECK: [[OR44:%[0-9]+]]:_(s32) = G_OR [[OR43]], [[SHL44]]
-    ; CHECK: [[COPY47:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL45:%[0-9]+]]:_(s32) = G_SHL [[COPY47]], [[C15]](s32)
-    ; CHECK: [[OR45:%[0-9]+]]:_(s32) = G_OR [[OR44]], [[SHL45]]
-    ; CHECK: [[COPY48:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL46:%[0-9]+]]:_(s32) = G_SHL [[COPY48]], [[C16]](s32)
-    ; CHECK: [[OR46:%[0-9]+]]:_(s32) = G_OR [[OR45]], [[SHL46]]
-    ; CHECK: [[COPY49:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL47:%[0-9]+]]:_(s32) = G_SHL [[COPY49]], [[C17]](s32)
-    ; CHECK: [[OR47:%[0-9]+]]:_(s32) = G_OR [[OR46]], [[SHL47]]
-    ; CHECK: [[COPY50:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL48:%[0-9]+]]:_(s32) = G_SHL [[COPY50]], [[C18]](s32)
-    ; CHECK: [[OR48:%[0-9]+]]:_(s32) = G_OR [[OR47]], [[SHL48]]
-    ; CHECK: [[COPY51:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[SHL49:%[0-9]+]]:_(s32) = G_SHL [[COPY51]], [[C19]](s32)
-    ; CHECK: [[OR49:%[0-9]+]]:_(s32) = G_OR [[OR48]], [[SHL49]]
-    ; CHECK: [[COPY52:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[SHL50:%[0-9]+]]:_(s32) = G_SHL [[COPY52]], [[C20]](s32)
-    ; CHECK: [[OR50:%[0-9]+]]:_(s32) = G_OR [[OR49]], [[SHL50]]
-    ; CHECK: [[COPY53:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL51:%[0-9]+]]:_(s32) = G_SHL [[COPY53]], [[C21]](s32)
-    ; CHECK: [[OR51:%[0-9]+]]:_(s32) = G_OR [[OR50]], [[SHL51]]
-    ; CHECK: [[COPY54:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL52:%[0-9]+]]:_(s32) = G_SHL [[COPY54]], [[C22]](s32)
-    ; CHECK: [[OR52:%[0-9]+]]:_(s32) = G_OR [[OR51]], [[SHL52]]
-    ; CHECK: [[COPY55:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL53:%[0-9]+]]:_(s32) = G_SHL [[COPY55]], [[C23]](s32)
-    ; CHECK: [[OR53:%[0-9]+]]:_(s32) = G_OR [[OR52]], [[SHL53]]
-    ; CHECK: [[COPY56:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL54:%[0-9]+]]:_(s32) = G_SHL [[COPY56]], [[C24]](s32)
-    ; CHECK: [[OR54:%[0-9]+]]:_(s32) = G_OR [[OR53]], [[SHL54]]
-    ; CHECK: [[COPY57:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL55:%[0-9]+]]:_(s32) = G_SHL [[COPY57]], [[C25]](s32)
-    ; CHECK: [[OR55:%[0-9]+]]:_(s32) = G_OR [[OR54]], [[SHL55]]
-    ; CHECK: [[COPY58:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL56:%[0-9]+]]:_(s32) = G_SHL [[COPY58]], [[C26]](s32)
-    ; CHECK: [[OR56:%[0-9]+]]:_(s32) = G_OR [[OR55]], [[SHL56]]
-    ; CHECK: [[COPY59:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL57:%[0-9]+]]:_(s32) = G_SHL [[COPY59]], [[C27]](s32)
-    ; CHECK: [[OR57:%[0-9]+]]:_(s32) = G_OR [[OR56]], [[SHL57]]
-    ; CHECK: [[COPY60:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL58:%[0-9]+]]:_(s32) = G_SHL [[COPY60]], [[C28]](s32)
-    ; CHECK: [[OR58:%[0-9]+]]:_(s32) = G_OR [[OR57]], [[SHL58]]
-    ; CHECK: [[COPY61:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL59:%[0-9]+]]:_(s32) = G_SHL [[COPY61]], [[C29]](s32)
-    ; CHECK: [[OR59:%[0-9]+]]:_(s32) = G_OR [[OR58]], [[SHL59]]
-    ; CHECK: [[COPY62:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL60:%[0-9]+]]:_(s32) = G_SHL [[COPY62]], [[C30]](s32)
-    ; CHECK: [[OR60:%[0-9]+]]:_(s32) = G_OR [[OR59]], [[SHL60]]
-    ; CHECK: [[COPY63:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL61:%[0-9]+]]:_(s32) = G_SHL [[COPY63]], [[C31]](s32)
-    ; CHECK: [[OR61:%[0-9]+]]:_(s32) = G_OR [[OR60]], [[SHL61]]
-    ; CHECK: [[COPY64:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[COPY65:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL62:%[0-9]+]]:_(s32) = G_SHL [[COPY65]], [[C1]](s32)
-    ; CHECK: [[OR62:%[0-9]+]]:_(s32) = G_OR [[COPY64]], [[SHL62]]
-    ; CHECK: [[COPY66:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL63:%[0-9]+]]:_(s32) = G_SHL [[COPY66]], [[C2]](s32)
-    ; CHECK: [[OR63:%[0-9]+]]:_(s32) = G_OR [[OR62]], [[SHL63]]
-    ; CHECK: [[COPY67:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL64:%[0-9]+]]:_(s32) = G_SHL [[COPY67]], [[C3]](s32)
-    ; CHECK: [[OR64:%[0-9]+]]:_(s32) = G_OR [[OR63]], [[SHL64]]
-    ; CHECK: [[COPY68:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL65:%[0-9]+]]:_(s32) = G_SHL [[COPY68]], [[C4]](s32)
-    ; CHECK: [[OR65:%[0-9]+]]:_(s32) = G_OR [[OR64]], [[SHL65]]
-    ; CHECK: [[COPY69:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL66:%[0-9]+]]:_(s32) = G_SHL [[COPY69]], [[C5]](s32)
-    ; CHECK: [[OR66:%[0-9]+]]:_(s32) = G_OR [[OR65]], [[SHL66]]
-    ; CHECK: [[COPY70:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL67:%[0-9]+]]:_(s32) = G_SHL [[COPY70]], [[C6]](s32)
-    ; CHECK: [[OR67:%[0-9]+]]:_(s32) = G_OR [[OR66]], [[SHL67]]
-    ; CHECK: [[COPY71:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL68:%[0-9]+]]:_(s32) = G_SHL [[COPY71]], [[C7]](s32)
-    ; CHECK: [[OR68:%[0-9]+]]:_(s32) = G_OR [[OR67]], [[SHL68]]
-    ; CHECK: [[COPY72:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL69:%[0-9]+]]:_(s32) = G_SHL [[COPY72]], [[C8]](s32)
-    ; CHECK: [[OR69:%[0-9]+]]:_(s32) = G_OR [[OR68]], [[SHL69]]
-    ; CHECK: [[COPY73:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL70:%[0-9]+]]:_(s32) = G_SHL [[COPY73]], [[C9]](s32)
-    ; CHECK: [[OR70:%[0-9]+]]:_(s32) = G_OR [[OR69]], [[SHL70]]
-    ; CHECK: [[COPY74:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL71:%[0-9]+]]:_(s32) = G_SHL [[COPY74]], [[C10]](s32)
-    ; CHECK: [[OR71:%[0-9]+]]:_(s32) = G_OR [[OR70]], [[SHL71]]
-    ; CHECK: [[COPY75:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL72:%[0-9]+]]:_(s32) = G_SHL [[COPY75]], [[C11]](s32)
-    ; CHECK: [[OR72:%[0-9]+]]:_(s32) = G_OR [[OR71]], [[SHL72]]
-    ; CHECK: [[COPY76:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL73:%[0-9]+]]:_(s32) = G_SHL [[COPY76]], [[C12]](s32)
-    ; CHECK: [[OR73:%[0-9]+]]:_(s32) = G_OR [[OR72]], [[SHL73]]
-    ; CHECK: [[COPY77:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL74:%[0-9]+]]:_(s32) = G_SHL [[COPY77]], [[C13]](s32)
-    ; CHECK: [[OR74:%[0-9]+]]:_(s32) = G_OR [[OR73]], [[SHL74]]
-    ; CHECK: [[COPY78:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL75:%[0-9]+]]:_(s32) = G_SHL [[COPY78]], [[C14]](s32)
-    ; CHECK: [[OR75:%[0-9]+]]:_(s32) = G_OR [[OR74]], [[SHL75]]
-    ; CHECK: [[COPY79:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL76:%[0-9]+]]:_(s32) = G_SHL [[COPY79]], [[C15]](s32)
-    ; CHECK: [[OR76:%[0-9]+]]:_(s32) = G_OR [[OR75]], [[SHL76]]
-    ; CHECK: [[COPY80:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL77:%[0-9]+]]:_(s32) = G_SHL [[COPY80]], [[C16]](s32)
-    ; CHECK: [[OR77:%[0-9]+]]:_(s32) = G_OR [[OR76]], [[SHL77]]
-    ; CHECK: [[COPY81:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL78:%[0-9]+]]:_(s32) = G_SHL [[COPY81]], [[C17]](s32)
-    ; CHECK: [[OR78:%[0-9]+]]:_(s32) = G_OR [[OR77]], [[SHL78]]
-    ; CHECK: [[COPY82:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL79:%[0-9]+]]:_(s32) = G_SHL [[COPY82]], [[C18]](s32)
-    ; CHECK: [[OR79:%[0-9]+]]:_(s32) = G_OR [[OR78]], [[SHL79]]
-    ; CHECK: [[COPY83:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL80:%[0-9]+]]:_(s32) = G_SHL [[COPY83]], [[C19]](s32)
-    ; CHECK: [[OR80:%[0-9]+]]:_(s32) = G_OR [[OR79]], [[SHL80]]
-    ; CHECK: [[COPY84:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL81:%[0-9]+]]:_(s32) = G_SHL [[COPY84]], [[C20]](s32)
-    ; CHECK: [[OR81:%[0-9]+]]:_(s32) = G_OR [[OR80]], [[SHL81]]
-    ; CHECK: [[COPY85:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL82:%[0-9]+]]:_(s32) = G_SHL [[COPY85]], [[C21]](s32)
-    ; CHECK: [[OR82:%[0-9]+]]:_(s32) = G_OR [[OR81]], [[SHL82]]
-    ; CHECK: [[COPY86:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL83:%[0-9]+]]:_(s32) = G_SHL [[COPY86]], [[C22]](s32)
-    ; CHECK: [[OR83:%[0-9]+]]:_(s32) = G_OR [[OR82]], [[SHL83]]
-    ; CHECK: [[COPY87:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL84:%[0-9]+]]:_(s32) = G_SHL [[COPY87]], [[C23]](s32)
-    ; CHECK: [[OR84:%[0-9]+]]:_(s32) = G_OR [[OR83]], [[SHL84]]
-    ; CHECK: [[COPY88:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL85:%[0-9]+]]:_(s32) = G_SHL [[COPY88]], [[C24]](s32)
-    ; CHECK: [[OR85:%[0-9]+]]:_(s32) = G_OR [[OR84]], [[SHL85]]
-    ; CHECK: [[COPY89:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL86:%[0-9]+]]:_(s32) = G_SHL [[COPY89]], [[C25]](s32)
-    ; CHECK: [[OR86:%[0-9]+]]:_(s32) = G_OR [[OR85]], [[SHL86]]
-    ; CHECK: [[COPY90:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL87:%[0-9]+]]:_(s32) = G_SHL [[COPY90]], [[C26]](s32)
-    ; CHECK: [[OR87:%[0-9]+]]:_(s32) = G_OR [[OR86]], [[SHL87]]
-    ; CHECK: [[COPY91:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL88:%[0-9]+]]:_(s32) = G_SHL [[COPY91]], [[C27]](s32)
-    ; CHECK: [[OR88:%[0-9]+]]:_(s32) = G_OR [[OR87]], [[SHL88]]
-    ; CHECK: [[COPY92:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL89:%[0-9]+]]:_(s32) = G_SHL [[COPY92]], [[C28]](s32)
-    ; CHECK: [[OR89:%[0-9]+]]:_(s32) = G_OR [[OR88]], [[SHL89]]
-    ; CHECK: [[COPY93:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL90:%[0-9]+]]:_(s32) = G_SHL [[COPY93]], [[C29]](s32)
-    ; CHECK: [[OR90:%[0-9]+]]:_(s32) = G_OR [[OR89]], [[SHL90]]
-    ; CHECK: [[COPY94:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL91:%[0-9]+]]:_(s32) = G_SHL [[COPY94]], [[C30]](s32)
-    ; CHECK: [[OR91:%[0-9]+]]:_(s32) = G_OR [[OR90]], [[SHL91]]
-    ; CHECK: [[COPY95:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL92:%[0-9]+]]:_(s32) = G_SHL [[COPY95]], [[C31]](s32)
-    ; CHECK: [[OR92:%[0-9]+]]:_(s32) = G_OR [[OR91]], [[SHL92]]
-    ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR30]](s32), [[OR61]](s32), [[OR92]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s68) = G_TRUNC [[MV]](s96)
-    ; CHECK: S_NOP 0, implicit [[TRUNC]](s68)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]]
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C3]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C4]](s32)
+    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]]
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C5]](s32)
+    ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C6]](s32)
+    ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C7]](s32)
+    ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]]
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C8]](s32)
+    ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+    ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C9]](s32)
+    ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+    ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C10]](s32)
+    ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]]
+    ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+    ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C11]](s32)
+    ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[C12]](s32)
+    ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+    ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C13]](s32)
+    ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]]
+    ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+    ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[COPY14]], [[C14]](s32)
+    ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+    ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C15]](s32)
+    ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+    ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C16]](s32)
+    ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[OR14]], [[SHL15]]
+    ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C17]](s32)
+    ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 18
+    ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C18]](s32)
+    ; CHECK-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+    ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
+    ; CHECK-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C19]](s32)
+    ; CHECK-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[OR17]], [[SHL18]]
+    ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+    ; CHECK-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C20]](s32)
+    ; CHECK-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
+    ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 21
+    ; CHECK-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[COPY21]], [[C21]](s32)
+    ; CHECK-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
+    ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 22
+    ; CHECK-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[COPY22]], [[C22]](s32)
+    ; CHECK-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[OR20]], [[SHL21]]
+    ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; CHECK-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[COPY23]], [[C23]](s32)
+    ; CHECK-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
+    ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[COPY24]], [[C24]](s32)
+    ; CHECK-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
+    ; CHECK-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[COPY25]], [[C25]](s32)
+    ; CHECK-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[OR23]], [[SHL24]]
+    ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 26
+    ; CHECK-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[COPY26]], [[C26]](s32)
+    ; CHECK-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[OR24]], [[SHL25]]
+    ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 27
+    ; CHECK-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[COPY27]], [[C27]](s32)
+    ; CHECK-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[OR25]], [[SHL26]]
+    ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+    ; CHECK-NEXT: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[COPY28]], [[C28]](s32)
+    ; CHECK-NEXT: [[OR27:%[0-9]+]]:_(s32) = G_OR [[OR26]], [[SHL27]]
+    ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
+    ; CHECK-NEXT: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[COPY29]], [[C29]](s32)
+    ; CHECK-NEXT: [[OR28:%[0-9]+]]:_(s32) = G_OR [[OR27]], [[SHL28]]
+    ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
+    ; CHECK-NEXT: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[COPY30]], [[C30]](s32)
+    ; CHECK-NEXT: [[OR29:%[0-9]+]]:_(s32) = G_OR [[OR28]], [[SHL29]]
+    ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; CHECK-NEXT: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[COPY31]], [[C31]](s32)
+    ; CHECK-NEXT: [[OR30:%[0-9]+]]:_(s32) = G_OR [[OR29]], [[SHL30]]
+    ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL31:%[0-9]+]]:_(s32) = G_SHL [[COPY33]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR31:%[0-9]+]]:_(s32) = G_OR [[COPY32]], [[SHL31]]
+    ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL32:%[0-9]+]]:_(s32) = G_SHL [[COPY34]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR32:%[0-9]+]]:_(s32) = G_OR [[OR31]], [[SHL32]]
+    ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[SHL33:%[0-9]+]]:_(s32) = G_SHL [[COPY35]], [[C3]](s32)
+    ; CHECK-NEXT: [[OR33:%[0-9]+]]:_(s32) = G_OR [[OR32]], [[SHL33]]
+    ; CHECK-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL34:%[0-9]+]]:_(s32) = G_SHL [[COPY36]], [[C4]](s32)
+    ; CHECK-NEXT: [[OR34:%[0-9]+]]:_(s32) = G_OR [[OR33]], [[SHL34]]
+    ; CHECK-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL35:%[0-9]+]]:_(s32) = G_SHL [[COPY37]], [[C5]](s32)
+    ; CHECK-NEXT: [[OR35:%[0-9]+]]:_(s32) = G_OR [[OR34]], [[SHL35]]
+    ; CHECK-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL36:%[0-9]+]]:_(s32) = G_SHL [[COPY38]], [[C6]](s32)
+    ; CHECK-NEXT: [[OR36:%[0-9]+]]:_(s32) = G_OR [[OR35]], [[SHL36]]
+    ; CHECK-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL37:%[0-9]+]]:_(s32) = G_SHL [[COPY39]], [[C7]](s32)
+    ; CHECK-NEXT: [[OR37:%[0-9]+]]:_(s32) = G_OR [[OR36]], [[SHL37]]
+    ; CHECK-NEXT: [[COPY40:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL38:%[0-9]+]]:_(s32) = G_SHL [[COPY40]], [[C8]](s32)
+    ; CHECK-NEXT: [[OR38:%[0-9]+]]:_(s32) = G_OR [[OR37]], [[SHL38]]
+    ; CHECK-NEXT: [[COPY41:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL39:%[0-9]+]]:_(s32) = G_SHL [[COPY41]], [[C9]](s32)
+    ; CHECK-NEXT: [[OR39:%[0-9]+]]:_(s32) = G_OR [[OR38]], [[SHL39]]
+    ; CHECK-NEXT: [[COPY42:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL40:%[0-9]+]]:_(s32) = G_SHL [[COPY42]], [[C10]](s32)
+    ; CHECK-NEXT: [[OR40:%[0-9]+]]:_(s32) = G_OR [[OR39]], [[SHL40]]
+    ; CHECK-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL41:%[0-9]+]]:_(s32) = G_SHL [[COPY43]], [[C11]](s32)
+    ; CHECK-NEXT: [[OR41:%[0-9]+]]:_(s32) = G_OR [[OR40]], [[SHL41]]
+    ; CHECK-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL42:%[0-9]+]]:_(s32) = G_SHL [[COPY44]], [[C12]](s32)
+    ; CHECK-NEXT: [[OR42:%[0-9]+]]:_(s32) = G_OR [[OR41]], [[SHL42]]
+    ; CHECK-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL43:%[0-9]+]]:_(s32) = G_SHL [[COPY45]], [[C13]](s32)
+    ; CHECK-NEXT: [[OR43:%[0-9]+]]:_(s32) = G_OR [[OR42]], [[SHL43]]
+    ; CHECK-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL44:%[0-9]+]]:_(s32) = G_SHL [[COPY46]], [[C14]](s32)
+    ; CHECK-NEXT: [[OR44:%[0-9]+]]:_(s32) = G_OR [[OR43]], [[SHL44]]
+    ; CHECK-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL45:%[0-9]+]]:_(s32) = G_SHL [[COPY47]], [[C15]](s32)
+    ; CHECK-NEXT: [[OR45:%[0-9]+]]:_(s32) = G_OR [[OR44]], [[SHL45]]
+    ; CHECK-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL46:%[0-9]+]]:_(s32) = G_SHL [[COPY48]], [[C16]](s32)
+    ; CHECK-NEXT: [[OR46:%[0-9]+]]:_(s32) = G_OR [[OR45]], [[SHL46]]
+    ; CHECK-NEXT: [[COPY49:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL47:%[0-9]+]]:_(s32) = G_SHL [[COPY49]], [[C17]](s32)
+    ; CHECK-NEXT: [[OR47:%[0-9]+]]:_(s32) = G_OR [[OR46]], [[SHL47]]
+    ; CHECK-NEXT: [[COPY50:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL48:%[0-9]+]]:_(s32) = G_SHL [[COPY50]], [[C18]](s32)
+    ; CHECK-NEXT: [[OR48:%[0-9]+]]:_(s32) = G_OR [[OR47]], [[SHL48]]
+    ; CHECK-NEXT: [[COPY51:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[SHL49:%[0-9]+]]:_(s32) = G_SHL [[COPY51]], [[C19]](s32)
+    ; CHECK-NEXT: [[OR49:%[0-9]+]]:_(s32) = G_OR [[OR48]], [[SHL49]]
+    ; CHECK-NEXT: [[COPY52:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[SHL50:%[0-9]+]]:_(s32) = G_SHL [[COPY52]], [[C20]](s32)
+    ; CHECK-NEXT: [[OR50:%[0-9]+]]:_(s32) = G_OR [[OR49]], [[SHL50]]
+    ; CHECK-NEXT: [[COPY53:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL51:%[0-9]+]]:_(s32) = G_SHL [[COPY53]], [[C21]](s32)
+    ; CHECK-NEXT: [[OR51:%[0-9]+]]:_(s32) = G_OR [[OR50]], [[SHL51]]
+    ; CHECK-NEXT: [[COPY54:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL52:%[0-9]+]]:_(s32) = G_SHL [[COPY54]], [[C22]](s32)
+    ; CHECK-NEXT: [[OR52:%[0-9]+]]:_(s32) = G_OR [[OR51]], [[SHL52]]
+    ; CHECK-NEXT: [[COPY55:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL53:%[0-9]+]]:_(s32) = G_SHL [[COPY55]], [[C23]](s32)
+    ; CHECK-NEXT: [[OR53:%[0-9]+]]:_(s32) = G_OR [[OR52]], [[SHL53]]
+    ; CHECK-NEXT: [[COPY56:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL54:%[0-9]+]]:_(s32) = G_SHL [[COPY56]], [[C24]](s32)
+    ; CHECK-NEXT: [[OR54:%[0-9]+]]:_(s32) = G_OR [[OR53]], [[SHL54]]
+    ; CHECK-NEXT: [[COPY57:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL55:%[0-9]+]]:_(s32) = G_SHL [[COPY57]], [[C25]](s32)
+    ; CHECK-NEXT: [[OR55:%[0-9]+]]:_(s32) = G_OR [[OR54]], [[SHL55]]
+    ; CHECK-NEXT: [[COPY58:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL56:%[0-9]+]]:_(s32) = G_SHL [[COPY58]], [[C26]](s32)
+    ; CHECK-NEXT: [[OR56:%[0-9]+]]:_(s32) = G_OR [[OR55]], [[SHL56]]
+    ; CHECK-NEXT: [[COPY59:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL57:%[0-9]+]]:_(s32) = G_SHL [[COPY59]], [[C27]](s32)
+    ; CHECK-NEXT: [[OR57:%[0-9]+]]:_(s32) = G_OR [[OR56]], [[SHL57]]
+    ; CHECK-NEXT: [[COPY60:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL58:%[0-9]+]]:_(s32) = G_SHL [[COPY60]], [[C28]](s32)
+    ; CHECK-NEXT: [[OR58:%[0-9]+]]:_(s32) = G_OR [[OR57]], [[SHL58]]
+    ; CHECK-NEXT: [[COPY61:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL59:%[0-9]+]]:_(s32) = G_SHL [[COPY61]], [[C29]](s32)
+    ; CHECK-NEXT: [[OR59:%[0-9]+]]:_(s32) = G_OR [[OR58]], [[SHL59]]
+    ; CHECK-NEXT: [[COPY62:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL60:%[0-9]+]]:_(s32) = G_SHL [[COPY62]], [[C30]](s32)
+    ; CHECK-NEXT: [[OR60:%[0-9]+]]:_(s32) = G_OR [[OR59]], [[SHL60]]
+    ; CHECK-NEXT: [[COPY63:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL61:%[0-9]+]]:_(s32) = G_SHL [[COPY63]], [[C31]](s32)
+    ; CHECK-NEXT: [[OR61:%[0-9]+]]:_(s32) = G_OR [[OR60]], [[SHL61]]
+    ; CHECK-NEXT: [[COPY64:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[COPY65:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL62:%[0-9]+]]:_(s32) = G_SHL [[COPY65]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR62:%[0-9]+]]:_(s32) = G_OR [[COPY64]], [[SHL62]]
+    ; CHECK-NEXT: [[COPY66:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL63:%[0-9]+]]:_(s32) = G_SHL [[COPY66]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR63:%[0-9]+]]:_(s32) = G_OR [[OR62]], [[SHL63]]
+    ; CHECK-NEXT: [[COPY67:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL64:%[0-9]+]]:_(s32) = G_SHL [[COPY67]], [[C3]](s32)
+    ; CHECK-NEXT: [[OR64:%[0-9]+]]:_(s32) = G_OR [[OR63]], [[SHL64]]
+    ; CHECK-NEXT: [[COPY68:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL65:%[0-9]+]]:_(s32) = G_SHL [[COPY68]], [[C4]](s32)
+    ; CHECK-NEXT: [[OR65:%[0-9]+]]:_(s32) = G_OR [[OR64]], [[SHL65]]
+    ; CHECK-NEXT: [[COPY69:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL66:%[0-9]+]]:_(s32) = G_SHL [[COPY69]], [[C5]](s32)
+    ; CHECK-NEXT: [[OR66:%[0-9]+]]:_(s32) = G_OR [[OR65]], [[SHL66]]
+    ; CHECK-NEXT: [[COPY70:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL67:%[0-9]+]]:_(s32) = G_SHL [[COPY70]], [[C6]](s32)
+    ; CHECK-NEXT: [[OR67:%[0-9]+]]:_(s32) = G_OR [[OR66]], [[SHL67]]
+    ; CHECK-NEXT: [[COPY71:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL68:%[0-9]+]]:_(s32) = G_SHL [[COPY71]], [[C7]](s32)
+    ; CHECK-NEXT: [[OR68:%[0-9]+]]:_(s32) = G_OR [[OR67]], [[SHL68]]
+    ; CHECK-NEXT: [[COPY72:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL69:%[0-9]+]]:_(s32) = G_SHL [[COPY72]], [[C8]](s32)
+    ; CHECK-NEXT: [[OR69:%[0-9]+]]:_(s32) = G_OR [[OR68]], [[SHL69]]
+    ; CHECK-NEXT: [[COPY73:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL70:%[0-9]+]]:_(s32) = G_SHL [[COPY73]], [[C9]](s32)
+    ; CHECK-NEXT: [[OR70:%[0-9]+]]:_(s32) = G_OR [[OR69]], [[SHL70]]
+    ; CHECK-NEXT: [[COPY74:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL71:%[0-9]+]]:_(s32) = G_SHL [[COPY74]], [[C10]](s32)
+    ; CHECK-NEXT: [[OR71:%[0-9]+]]:_(s32) = G_OR [[OR70]], [[SHL71]]
+    ; CHECK-NEXT: [[COPY75:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL72:%[0-9]+]]:_(s32) = G_SHL [[COPY75]], [[C11]](s32)
+    ; CHECK-NEXT: [[OR72:%[0-9]+]]:_(s32) = G_OR [[OR71]], [[SHL72]]
+    ; CHECK-NEXT: [[COPY76:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL73:%[0-9]+]]:_(s32) = G_SHL [[COPY76]], [[C12]](s32)
+    ; CHECK-NEXT: [[OR73:%[0-9]+]]:_(s32) = G_OR [[OR72]], [[SHL73]]
+    ; CHECK-NEXT: [[COPY77:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL74:%[0-9]+]]:_(s32) = G_SHL [[COPY77]], [[C13]](s32)
+    ; CHECK-NEXT: [[OR74:%[0-9]+]]:_(s32) = G_OR [[OR73]], [[SHL74]]
+    ; CHECK-NEXT: [[COPY78:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL75:%[0-9]+]]:_(s32) = G_SHL [[COPY78]], [[C14]](s32)
+    ; CHECK-NEXT: [[OR75:%[0-9]+]]:_(s32) = G_OR [[OR74]], [[SHL75]]
+    ; CHECK-NEXT: [[COPY79:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL76:%[0-9]+]]:_(s32) = G_SHL [[COPY79]], [[C15]](s32)
+    ; CHECK-NEXT: [[OR76:%[0-9]+]]:_(s32) = G_OR [[OR75]], [[SHL76]]
+    ; CHECK-NEXT: [[COPY80:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL77:%[0-9]+]]:_(s32) = G_SHL [[COPY80]], [[C16]](s32)
+    ; CHECK-NEXT: [[OR77:%[0-9]+]]:_(s32) = G_OR [[OR76]], [[SHL77]]
+    ; CHECK-NEXT: [[COPY81:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL78:%[0-9]+]]:_(s32) = G_SHL [[COPY81]], [[C17]](s32)
+    ; CHECK-NEXT: [[OR78:%[0-9]+]]:_(s32) = G_OR [[OR77]], [[SHL78]]
+    ; CHECK-NEXT: [[COPY82:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL79:%[0-9]+]]:_(s32) = G_SHL [[COPY82]], [[C18]](s32)
+    ; CHECK-NEXT: [[OR79:%[0-9]+]]:_(s32) = G_OR [[OR78]], [[SHL79]]
+    ; CHECK-NEXT: [[COPY83:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL80:%[0-9]+]]:_(s32) = G_SHL [[COPY83]], [[C19]](s32)
+    ; CHECK-NEXT: [[OR80:%[0-9]+]]:_(s32) = G_OR [[OR79]], [[SHL80]]
+    ; CHECK-NEXT: [[COPY84:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL81:%[0-9]+]]:_(s32) = G_SHL [[COPY84]], [[C20]](s32)
+    ; CHECK-NEXT: [[OR81:%[0-9]+]]:_(s32) = G_OR [[OR80]], [[SHL81]]
+    ; CHECK-NEXT: [[COPY85:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL82:%[0-9]+]]:_(s32) = G_SHL [[COPY85]], [[C21]](s32)
+    ; CHECK-NEXT: [[OR82:%[0-9]+]]:_(s32) = G_OR [[OR81]], [[SHL82]]
+    ; CHECK-NEXT: [[COPY86:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL83:%[0-9]+]]:_(s32) = G_SHL [[COPY86]], [[C22]](s32)
+    ; CHECK-NEXT: [[OR83:%[0-9]+]]:_(s32) = G_OR [[OR82]], [[SHL83]]
+    ; CHECK-NEXT: [[COPY87:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL84:%[0-9]+]]:_(s32) = G_SHL [[COPY87]], [[C23]](s32)
+    ; CHECK-NEXT: [[OR84:%[0-9]+]]:_(s32) = G_OR [[OR83]], [[SHL84]]
+    ; CHECK-NEXT: [[COPY88:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL85:%[0-9]+]]:_(s32) = G_SHL [[COPY88]], [[C24]](s32)
+    ; CHECK-NEXT: [[OR85:%[0-9]+]]:_(s32) = G_OR [[OR84]], [[SHL85]]
+    ; CHECK-NEXT: [[COPY89:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL86:%[0-9]+]]:_(s32) = G_SHL [[COPY89]], [[C25]](s32)
+    ; CHECK-NEXT: [[OR86:%[0-9]+]]:_(s32) = G_OR [[OR85]], [[SHL86]]
+    ; CHECK-NEXT: [[COPY90:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL87:%[0-9]+]]:_(s32) = G_SHL [[COPY90]], [[C26]](s32)
+    ; CHECK-NEXT: [[OR87:%[0-9]+]]:_(s32) = G_OR [[OR86]], [[SHL87]]
+    ; CHECK-NEXT: [[COPY91:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL88:%[0-9]+]]:_(s32) = G_SHL [[COPY91]], [[C27]](s32)
+    ; CHECK-NEXT: [[OR88:%[0-9]+]]:_(s32) = G_OR [[OR87]], [[SHL88]]
+    ; CHECK-NEXT: [[COPY92:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL89:%[0-9]+]]:_(s32) = G_SHL [[COPY92]], [[C28]](s32)
+    ; CHECK-NEXT: [[OR89:%[0-9]+]]:_(s32) = G_OR [[OR88]], [[SHL89]]
+    ; CHECK-NEXT: [[COPY93:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL90:%[0-9]+]]:_(s32) = G_SHL [[COPY93]], [[C29]](s32)
+    ; CHECK-NEXT: [[OR90:%[0-9]+]]:_(s32) = G_OR [[OR89]], [[SHL90]]
+    ; CHECK-NEXT: [[COPY94:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL91:%[0-9]+]]:_(s32) = G_SHL [[COPY94]], [[C30]](s32)
+    ; CHECK-NEXT: [[OR91:%[0-9]+]]:_(s32) = G_OR [[OR90]], [[SHL91]]
+    ; CHECK-NEXT: [[COPY95:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[SHL92:%[0-9]+]]:_(s32) = G_SHL [[COPY95]], [[C31]](s32)
+    ; CHECK-NEXT: [[OR92:%[0-9]+]]:_(s32) = G_OR [[OR91]], [[SHL92]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR30]](s32), [[OR61]](s32), [[OR92]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s68) = G_TRUNC [[MV]](s96)
+    ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC]](s68)
     %0:_(s17) = G_CONSTANT i17 0
     %1:_(s17) = G_CONSTANT i17 1
     %2:_(s17) = G_CONSTANT i17 2
@@ -987,12 +987,12 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_merge_p3_s16_s16
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
-    ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
-    ; CHECK: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+    ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     %0:_(s16) = G_CONSTANT i16 0
     %1:_(s16) = G_CONSTANT i16 1
     %2:_(p3) = G_MERGE_VALUES %0, %1
@@ -1007,14 +1007,14 @@ body: |
 
     ; CHECK-LABEL: name: test_merge_s32_s16_s16
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CHECK: $vgpr0 = COPY [[OR]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[OR]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -1031,20 +1031,20 @@ body: |
 
     ; CHECK-LABEL: name: test_merge_s48_s16_s16_s16
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -1067,9 +1067,9 @@ body: |
 
     ; CHECK-LABEL: name: test_merge_s256_s128
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s128), [[COPY1]](s128)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV]](s256)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s128), [[COPY1]](s128)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV]](s256)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s128) = COPY  $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(s256) = G_MERGE_VALUES %0, %1
@@ -1085,9 +1085,9 @@ body: |
 
     ; CHECK-LABEL: name: test_merge_s512_s256
     ; CHECK: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; CHECK: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s256), [[COPY1]](s256)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](s512)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s256), [[COPY1]](s256)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](s512)
     %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     %1:_(s256) = COPY $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     %2:_(s512) = G_MERGE_VALUES %0, %1
@@ -1103,9 +1103,9 @@ body: |
 
     ; CHECK-LABEL: name: test_merge_s1024_s512
     ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s512) = COPY $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
-    ; CHECK: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s512), [[COPY1]](s512)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[MV]](s1024)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s512) = COPY $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s512), [[COPY1]](s512)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[MV]](s1024)
     %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     %1:_(s512) = COPY $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
     %2:_(s1024) = G_MERGE_VALUES %0, %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir
index be56606a6ceee..09064e3dec7dc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir
@@ -9,8 +9,8 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s32_to_s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     %0:_(s32) = COPY $vgpr0
     %1:_(s64) = G_SEXT %0
     $vgpr0_vgpr1 = COPY %1
@@ -24,9 +24,9 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s16_to_s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32)
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT]], 16
-    ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64)
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT]], 16
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64)
     %0:_(s32) = COPY $vgpr0
     %1:_(s16) = G_TRUNC %0
     %2:_(s64) = G_SEXT %1
@@ -41,8 +41,8 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s16_to_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16
-    ; CHECK: $vgpr0 = COPY [[SEXT_INREG]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16
+    ; CHECK-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s16) = G_TRUNC %0
     %2:_(s32) = G_SEXT %1
@@ -57,8 +57,8 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s24_to_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 24
-    ; CHECK: $vgpr0 = COPY [[SEXT_INREG]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 24
+    ; CHECK-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s24) = G_TRUNC %0
     %2:_(s32) = G_SEXT %1
@@ -73,8 +73,8 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_i1_to_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1
-    ; CHECK: $vgpr0 = COPY [[SEXT_INREG]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1
+    ; CHECK-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s1) = G_TRUNC %0
     %2:_(s32) = G_SEXT %1
@@ -89,13 +89,13 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_v2s16_to_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16
-    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16
+    ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s32>) = G_SEXT %0
     $vgpr0_vgpr1 = COPY %1
@@ -109,19 +109,19 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_v3s16_to_v3s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
-    ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16
-    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16
-    ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16
+    ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16
+    ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_EXTRACT %0, 0
     %2:_(<3 x s32>) = G_SEXT %1
@@ -136,18 +136,18 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_v4s16_to_v4s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16
-    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16
-    ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16
-    ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32), [[SEXT_INREG3]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16
+    ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16
+    ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16
+    ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32), [[SEXT_INREG3]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = G_SEXT %0
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -161,11 +161,11 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_v2s32_to_v2s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32)
-    ; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32)
+    ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_SEXT %0
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -179,12 +179,12 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_v3s32_to_v3s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
-    ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32)
-    ; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32)
-    ; CHECK: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64), [[SEXT2]](s64)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32)
+    ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32)
+    ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64), [[SEXT2]](s64)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s64>) = G_SEXT %0
     S_NOP 0, implicit %1
@@ -199,13 +199,13 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_v4s32_to_v4s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
-    ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32)
-    ; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32)
-    ; CHECK: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32)
-    ; CHECK: [[SEXT3:%[0-9]+]]:_(s64) = G_SEXT [[UV3]](s32)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64), [[SEXT2]](s64), [[SEXT3]](s64)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32)
+    ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32)
+    ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32)
+    ; CHECK-NEXT: [[SEXT3:%[0-9]+]]:_(s64) = G_SEXT [[UV3]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64), [[SEXT2]](s64), [[SEXT3]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<4 x s64>) = G_SEXT %0
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -219,11 +219,11 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s8_to_s16
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16)
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16)
-    ; CHECK: S_ENDPGM 0, implicit [[ASHR]](s16)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16)
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_TRUNC %0
     %2:_(s16) = G_SEXT %1
@@ -238,9 +238,9 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s8_to_s24
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[SEXT_INREG]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s24)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[SEXT_INREG]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s24)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_TRUNC %0
     %2:_(s24) = G_SEXT %1
@@ -255,8 +255,8 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s7_to_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7
-    ; CHECK: S_ENDPGM 0, implicit [[SEXT_INREG]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s7) = G_TRUNC %0
     %2:_(s32) = G_SEXT %1
@@ -271,8 +271,8 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s8_to_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8
-    ; CHECK: S_ENDPGM 0, implicit [[SEXT_INREG]](s32)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[SEXT_INREG]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_TRUNC %0
     %2:_(s32) = G_SEXT %1
@@ -287,13 +287,13 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s32_to_s96
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV2:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s192)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s96)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s192)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s96)
     %0:_(s32) = COPY $vgpr0
     %1:_(s96) = G_SEXT %0
     S_ENDPGM 0, implicit %1
@@ -307,12 +307,12 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s32_to_s128
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV2]](s128)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](s128)
     %0:_(s32) = COPY $vgpr0
     %1:_(s128) = G_SEXT %0
     S_ENDPGM 0, implicit %1
@@ -326,13 +326,13 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s32_to_s160
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV2:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s160) = G_TRUNC [[MV2]](s320)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s160)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s160) = G_TRUNC [[MV2]](s320)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s160)
     %0:_(s32) = COPY $vgpr0
     %1:_(s160) = G_SEXT %0
     S_ENDPGM 0, implicit %1
@@ -346,12 +346,12 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s32_to_s192
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV2:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV2]](s192)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](s192)
     %0:_(s32) = COPY $vgpr0
     %1:_(s192) = G_SEXT %0
     S_ENDPGM 0, implicit %1
@@ -365,13 +365,13 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s32_to_s224
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV2:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV2]](s448)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s224)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV2]](s448)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s224)
     %0:_(s32) = COPY $vgpr0
     %1:_(s224) = G_SEXT %0
     S_ENDPGM 0, implicit %1
@@ -385,12 +385,12 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s32_to_s256
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV2]](s256)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](s256)
     %0:_(s32) = COPY $vgpr0
     %1:_(s256) = G_SEXT %0
     S_ENDPGM 0, implicit %1
@@ -404,12 +404,12 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s32_to_s512
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV2:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV2]](s512)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](s512)
     %0:_(s32) = COPY $vgpr0
     %1:_(s512) = G_SEXT %0
     S_ENDPGM 0, implicit %1
@@ -423,13 +423,13 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s32_to_s992
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV2:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV2]](s448)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s224)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV2]](s448)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s224)
     %0:_(s32) = COPY $vgpr0
     %1:_(s224) = G_SEXT %0
     S_ENDPGM 0, implicit %1
@@ -444,12 +444,12 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s32_to_s1024
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV2:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV2]](s1024)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](s1024)
     %0:_(s32) = COPY $vgpr0
     %1:_(s1024) = G_SEXT %0
     S_ENDPGM 0, implicit %1
@@ -463,10 +463,10 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s64_to_s128
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s128)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s128)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_SEXT %0
     S_ENDPGM 0, implicit %1
@@ -480,10 +480,10 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s64_to_s192
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s192)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s192)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s192) = G_SEXT %0
     S_ENDPGM 0, implicit %1
@@ -497,10 +497,10 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s64_to_s256
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s256)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s256)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s256) = G_SEXT %0
     S_ENDPGM 0, implicit %1
@@ -514,10 +514,10 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s64_to_s512
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s512)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s512)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s512) = G_SEXT %0
     S_ENDPGM 0, implicit %1
@@ -531,10 +531,10 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s64_to_s1024
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s1024)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s1024)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s1024) = G_SEXT %0
     S_ENDPGM 0, implicit %1
@@ -548,13 +548,13 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s96_to_s128
     ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
-    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[ASHR]](s32)
-    ; CHECK: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV2]](s128)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[ASHR]](s32)
+    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](s128)
     %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s128) = G_SEXT %0
     S_ENDPGM 0, implicit %1
@@ -568,11 +568,11 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s128_to_s256
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[ASHR]](s64), [[ASHR]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s256)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[ASHR]](s64), [[ASHR]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s256)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s256) = G_SEXT %0
     S_ENDPGM 0, implicit %1
@@ -586,76 +586,76 @@ body: |
 
     ; CHECK-LABEL: name: test_sext_s32_to_s88
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
-    ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C3]](s16)
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C3]](s16)
-    ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 7
-    ; CHECK: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[C4]](s16)
-    ; CHECK: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C5]]
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C5]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
-    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]]
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C5]]
-    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
-    ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C5]]
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]]
-    ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
-    ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C3]](s16)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
-    ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
-    ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C3]](s16)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]]
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]]
-    ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32)
-    ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL6]]
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CHECK: [[AND8:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
-    ; CHECK: [[AND9:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
-    ; CHECK: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C3]](s16)
-    ; CHECK: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL7]]
-    ; CHECK: [[AND10:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
-    ; CHECK: [[AND11:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
-    ; CHECK: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C3]](s16)
-    ; CHECK: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL8]]
-    ; CHECK: [[AND12:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
-    ; CHECK: [[AND13:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
-    ; CHECK: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C3]](s16)
-    ; CHECK: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]]
-    ; CHECK: [[AND14:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
-    ; CHECK: [[AND15:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
-    ; CHECK: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C3]](s16)
-    ; CHECK: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]]
-    ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; CHECK: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C1]](s32)
-    ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL11]]
-    ; CHECK: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
-    ; CHECK: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; CHECK: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C1]](s32)
-    ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL12]]
-    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; CHECK: [[MV2:%[0-9]+]]:_(s704) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64)
-    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s88) = G_TRUNC [[MV2]](s704)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC5]](s88)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C3]](s16)
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C3]](s16)
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 7
+    ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[C4]](s16)
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C5]]
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C5]]
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]]
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C5]]
+    ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C5]]
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]]
+    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
+    ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
+    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C3]](s16)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
+    ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
+    ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
+    ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C3]](s16)
+    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]]
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]]
+    ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL6]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
+    ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
+    ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C3]](s16)
+    ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL7]]
+    ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
+    ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
+    ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C3]](s16)
+    ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL8]]
+    ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
+    ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
+    ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C3]](s16)
+    ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]]
+    ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
+    ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]]
+    ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C3]](s16)
+    ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]]
+    ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+    ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL11]]
+    ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
+    ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+    ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL12]]
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s704) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64)
+    ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s88) = G_TRUNC [[MV2]](s704)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC5]](s88)
     %0:_(s32) = COPY $vgpr0
     %1:_(s88) = G_SEXT %0
     S_ENDPGM 0, implicit %1
@@ -696,10 +696,10 @@ body: |
     liveins: $vgpr0_vgpr1_vgpr2_vgpr3
     ; CHECK-LABEL: name: test_sext_s112_to_s128
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[UV1]], 48
-    ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[UV]](s64), [[SEXT_INREG]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s128)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[UV1]], 48
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[UV]](s64), [[SEXT_INREG]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s112) = G_TRUNC %0
     %2:_(s128) = G_SEXT %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir
index bab49a9849abf..6fa49513636e0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir
@@ -10,11 +10,11 @@ body: |
 
     ; CI-LABEL: name: test_sextload_constant32bit_s64_s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
-    ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
-    ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6)
-    ; CI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6)
+    ; CI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     %0:_(p6) = COPY $sgpr0
     %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), align 4, addrspace 6)
     $vgpr0_vgpr1 = COPY %1
@@ -28,11 +28,11 @@ body: |
 
     ; CI-LABEL: name: test_sextload_constant32bit_s64_s32_align2
     ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
-    ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
-    ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 2, addrspace 6)
-    ; CI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 2, addrspace 6)
+    ; CI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     %0:_(p6) = COPY $sgpr0
     %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), align 2, addrspace 6)
     $vgpr0_vgpr1 = COPY %1
@@ -46,11 +46,11 @@ body: |
 
     ; CI-LABEL: name: test_sextload_constant32bit_s64_s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
-    ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
-    ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 1, addrspace 6)
-    ; CI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 1, addrspace 6)
+    ; CI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     %0:_(p6) = COPY $sgpr0
     %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), align 1, addrspace 6)
     $vgpr0_vgpr1 = COPY %1
@@ -64,10 +64,10 @@ body: |
 
     ; CI-LABEL: name: test_sextload_constant32bit_s32_s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
-    ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
-    ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
-    ; CI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6)
-    ; CI: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
+    ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6)
+    ; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     %0:_(p6) = COPY $sgpr0
     %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), align 1, addrspace 6)
     $vgpr0 = COPY %1
@@ -81,10 +81,10 @@ body: |
 
     ; CI-LABEL: name: test_sextload_constant32bit_s32_s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
-    ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
-    ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
-    ; CI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s16), addrspace 6)
-    ; CI: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
+    ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s16), addrspace 6)
+    ; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     %0:_(p6) = COPY $sgpr0
     %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), align 2, addrspace 6)
     $vgpr0 = COPY %1
@@ -98,10 +98,10 @@ body: |
 
     ; CI-LABEL: name: test_sextload_constant32bit_s32_s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
-    ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
-    ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
-    ; CI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s16), align 1, addrspace 6)
-    ; CI: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
+    ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s16), align 1, addrspace 6)
+    ; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     %0:_(p6) = COPY $sgpr0
     %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), align 1, addrspace 6)
     $vgpr0 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-flat.mir
index bdde9078aaeff..683ca816fcd6f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-flat.mir
@@ -9,12 +9,12 @@ body: |
 
     ; SI-LABEL: name: test_sextload_flat_i32_i8
     ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; SI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8))
-    ; SI: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; SI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8))
+    ; SI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     ; VI-LABEL: name: test_sextload_flat_i32_i8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8))
-    ; VI: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; VI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 0)
     $vgpr0 = COPY %1
@@ -27,12 +27,12 @@ body: |
 
     ; SI-LABEL: name: test_sextload_flat_i32_i16
     ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; SI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16))
-    ; SI: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; SI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16))
+    ; SI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     ; VI-LABEL: name: test_sextload_flat_i32_i16
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16))
-    ; VI: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; VI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16))
+    ; VI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
      %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 0)
     $vgpr0 = COPY %1
@@ -45,12 +45,12 @@ body: |
 
     ; SI-LABEL: name: test_sextload_flat_i31_i8
     ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; SI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8))
-    ; SI: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; SI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8))
+    ; SI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     ; VI-LABEL: name: test_sextload_flat_i31_i8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8))
-    ; VI: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; VI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s31) = G_SEXTLOAD %0 :: (load (s8), addrspace 0)
     %2:_(s32) = G_ANYEXT %1
@@ -64,14 +64,14 @@ body: |
 
     ; SI-LABEL: name: test_sextload_flat_i64_i8
     ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; SI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8))
-    ; SI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; SI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8))
+    ; SI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     ; VI-LABEL: name: test_sextload_flat_i64_i8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; VI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_SEXTLOAD %0 :: (load (s8), addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -84,14 +84,14 @@ body: |
 
     ; SI-LABEL: name: test_sextload_flat_i64_i16
     ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; SI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16))
-    ; SI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; SI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16))
+    ; SI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     ; VI-LABEL: name: test_sextload_flat_i64_i16
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16))
-    ; VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; VI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16))
+    ; VI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_SEXTLOAD %0 :: (load (s16), addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -104,14 +104,14 @@ body: |
 
     ; SI-LABEL: name: test_sextload_flat_i64_i32
     ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; SI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; SI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     ; VI-LABEL: name: test_sextload_flat_i64_i32
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; VI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), addrspace 0)
     $vgpr0_vgpr1 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir
index 1e0e13bd29018..1f70ceb475e32 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir
@@ -22,14 +22,14 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_i32_i1
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 1
-    ; GFX8: $vgpr0 = COPY [[SEXT_INREG]](s32)
+    ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 1
+    ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32)
     ; GFX6-LABEL: name: test_sextload_global_i32_i1
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 1
-    ; GFX6: $vgpr0 = COPY [[SEXT_INREG]](s32)
+    ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 1
+    ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_SEXTLOAD %0 :: (load (s1), addrspace 1)
     $vgpr0 = COPY %1
@@ -43,14 +43,14 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_i32_i7
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 7
-    ; GFX8: $vgpr0 = COPY [[SEXT_INREG]](s32)
+    ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 7
+    ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32)
     ; GFX6-LABEL: name: test_sextload_global_i32_i7
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 7
-    ; GFX6: $vgpr0 = COPY [[SEXT_INREG]](s32)
+    ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 7
+    ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_SEXTLOAD %0 :: (load (s7), addrspace 1)
     $vgpr0 = COPY %1
@@ -63,24 +63,24 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_i32_i24
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX8: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
-    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32)
-    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX8: $vgpr0 = COPY [[OR]](s32)
+    ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX8-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+    ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32)
+    ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; GFX8-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; GFX6-LABEL: name: test_sextload_global_i32_i24
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
-    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32)
-    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX6: $vgpr0 = COPY [[OR]](s32)
+    ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+    ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32)
+    ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; GFX6-NEXT: $vgpr0 = COPY [[OR]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_SEXTLOAD %0 :: (load (s24), addrspace 1)
     $vgpr0 = COPY %1
@@ -94,14 +94,14 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_i32_i30
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 30
-    ; GFX8: $vgpr0 = COPY [[SEXT_INREG]](s32)
+    ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 30
+    ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32)
     ; GFX6-LABEL: name: test_sextload_global_i32_i30
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 30
-    ; GFX6: $vgpr0 = COPY [[SEXT_INREG]](s32)
+    ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 30
+    ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_SEXTLOAD %0 :: (load (s30), addrspace 1)
     $vgpr0 = COPY %1
@@ -115,14 +115,14 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_i32_i31
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 31
-    ; GFX8: $vgpr0 = COPY [[SEXT_INREG]](s32)
+    ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 31
+    ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32)
     ; GFX6-LABEL: name: test_sextload_global_i32_i31
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 31
-    ; GFX6: $vgpr0 = COPY [[SEXT_INREG]](s32)
+    ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 31
+    ; GFX6-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_SEXTLOAD %0 :: (load (s31), addrspace 1)
     $vgpr0 = COPY %1
@@ -136,12 +136,12 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_i32_i8
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX8-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     ; GFX6-LABEL: name: test_sextload_global_i32_i8
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX6-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1)
     $vgpr0 = COPY %1
@@ -155,12 +155,12 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_i32_i16
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX8-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     ; GFX6-LABEL: name: test_sextload_global_i32_i16
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX6-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 1)
     $vgpr0 = COPY %1
@@ -173,12 +173,12 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_i31_i8
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX8-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     ; GFX6-LABEL: name: test_sextload_global_i31_i8
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX6-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s31) = G_SEXTLOAD %0 :: (load (s8), addrspace 1)
     %2:_(s32) = G_ANYEXT %1
@@ -192,14 +192,14 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_i64_i8
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX8: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
+    ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     ; GFX6-LABEL: name: test_sextload_global_i64_i8
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
-    ; GFX6: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
+    ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_SEXTLOAD %0 :: (load (s8), addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -212,14 +212,14 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_i64_i16
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX8: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
+    ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     ; GFX6-LABEL: name: test_sextload_global_i64_i16
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
-    ; GFX6: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
+    ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_SEXTLOAD %0 :: (load (s16), addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -232,14 +232,14 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_i64_i32
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX8: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
+    ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     ; GFX6-LABEL: name: test_sextload_global_i64_i32
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
-    ; GFX6: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
+    ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -253,18 +253,18 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_s32_from_2_align1
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; GFX8-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     ; GFX6-LABEL: name: test_sextload_global_s32_from_2_align1
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32)
-    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX6: $vgpr0 = COPY [[OR]](s32)
+    ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32)
+    ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; GFX6-NEXT: $vgpr0 = COPY [[OR]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), align 1, addrspace 1)
     $vgpr0 = COPY %1
@@ -278,20 +278,20 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_s64_from_2_align1
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; GFX8: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
+    ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     ; GFX6-LABEL: name: test_sextload_global_s64_from_2_align1
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32)
-    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[OR]](s32)
-    ; GFX6: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32)
+    ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[OR]](s32)
+    ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_SEXTLOAD %0 :: (load (s16), align 1, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -305,12 +305,12 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_v2i16_from_v2s8
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1)
-    ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](<2 x s16>)
+    ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1)
+    ; GFX8-NEXT: $vgpr0 = COPY [[SEXTLOAD]](<2 x s16>)
     ; GFX6-LABEL: name: test_sextload_global_v2i16_from_v2s8
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1)
-    ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](<2 x s16>)
+    ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1)
+    ; GFX6-NEXT: $vgpr0 = COPY [[SEXTLOAD]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = G_SEXTLOAD %0 :: (load (<2 x s8>), addrspace 1)
     $vgpr0 = COPY %1
@@ -324,12 +324,12 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_v2i32_from_v2s8
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>)
+    ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1)
+    ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>)
     ; GFX6-LABEL: name: test_sextload_global_v2i32_from_v2s8
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1)
-    ; GFX6: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>)
+    ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1)
+    ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_SEXTLOAD %0 :: (load (<2 x s8>), addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -343,12 +343,12 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_v2i32_from_v2s16
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>)
+    ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>)
     ; GFX6-LABEL: name: test_sextload_global_v2i32_from_v2s16
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; GFX6: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>)
+    ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_SEXTLOAD %0 :: (load (<2 x s16>), addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -362,12 +362,12 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_v2i64_from_v2s16
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>)
+    ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>)
     ; GFX6-LABEL: name: test_sextload_global_v2i64_from_v2s16
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>)
+    ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_SEXTLOAD %0 :: (load (<2 x s16>), addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -381,12 +381,12 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_v2i64_from_v2s32
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
-    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>)
+    ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+    ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>)
     ; GFX6-LABEL: name: test_sextload_global_v2i64_from_v2s32
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
-    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>)
+    ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+    ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_SEXTLOAD %0 :: (load (<2 x s32>), addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -400,12 +400,12 @@ body: |
 
     ; GFX8-LABEL: name: test_sextload_global_s128_8
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s128) = G_SEXTLOAD [[COPY]](p1) :: (load (s64), addrspace 1)
-    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](s128)
+    ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s128) = G_SEXTLOAD [[COPY]](p1) :: (load (s64), addrspace 1)
+    ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](s128)
     ; GFX6-LABEL: name: test_sextload_global_s128_8
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s128) = G_SEXTLOAD [[COPY]](p1) :: (load (s64), addrspace 1)
-    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](s128)
+    ; GFX6-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s128) = G_SEXTLOAD [[COPY]](p1) :: (load (s64), addrspace 1)
+    ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](s128)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_SEXTLOAD %0 :: (load (s64), addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-local.mir
index 54e6d68ce43ac..f630a8829522c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-local.mir
@@ -9,8 +9,8 @@ body: |
 
     ; CHECK-LABEL: name: test_sextload_local_i32_i8
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CHECK: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 3)
     $vgpr0 = COPY %1
@@ -23,8 +23,8 @@ body: |
 
     ; CHECK-LABEL: name: test_sextload_local_i32_i16
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CHECK: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 3)
     $vgpr0 = COPY %1
@@ -37,8 +37,8 @@ body: |
 
     ; CHECK-LABEL: name: test_sextload_local_i31_i8
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CHECK: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s31) = G_SEXTLOAD %0 :: (load (s8), addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -52,9 +52,9 @@ body: |
 
     ; CHECK-LABEL: name: test_sextload_local_i64_i8
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_SEXTLOAD %0 :: (load (s8), addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -67,9 +67,9 @@ body: |
 
     ; CHECK-LABEL: name: test_sextload_local_i64_i16
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_SEXTLOAD %0 :: (load (s16), addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -82,9 +82,9 @@ body: |
 
     ; CHECK-LABEL: name: test_sextload_local_i64_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), addrspace 3)
     $vgpr0_vgpr1 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-private.mir
index 8a71cfcf7d25b..cf2f137b7189a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-private.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-private.mir
@@ -10,8 +10,8 @@ body: |
 
     ; CHECK-LABEL: name: test_sextload_private_i32_i8
     ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CHECK: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 5)
 
@@ -25,8 +25,8 @@ body: |
 
     ; CHECK-LABEL: name: test_sextload_private_i32_i16
     ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CHECK: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 5)
     $vgpr0 = COPY %1
@@ -39,8 +39,8 @@ body: |
 
     ; CHECK-LABEL: name: test_sextload_private_i31_i8
     ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CHECK: $vgpr0 = COPY [[SEXTLOAD]](s32)
+    ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s31) = G_SEXTLOAD %0 :: (load (s8), addrspace 5)
     %2:_(s32) = G_ANYEXT %1
@@ -54,9 +54,9 @@ body: |
 
     ; CHECK-LABEL: name: test_sextload_private_i64_i8
     ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_SEXTLOAD %0 :: (load (s8), addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -69,9 +69,9 @@ body: |
 
     ; CHECK-LABEL: name: test_sextload_private_i64_i16
     ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_SEXTLOAD %0 :: (load (s16), addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -84,9 +84,9 @@ body: |
 
     ; CHECK-LABEL: name: test_sextload_private_i64_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), addrspace 5)
     $vgpr0_vgpr1 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
index 3fb2bd67da106..f657c2866de9e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
@@ -20,32 +20,32 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s1_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
-    ; SI: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+    ; SI-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
     ; CI-LABEL: name: test_store_global_s1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
-    ; CI: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+    ; CI-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
     ; VI-LABEL: name: test_store_global_s1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
-    ; VI: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+    ; VI-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
-    ; GFX9: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+    ; GFX9-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s1) = G_TRUNC %1
@@ -60,28 +60,28 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s7_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; SI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; SI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
     ; CI-LABEL: name: test_store_global_s7_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; CI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
     ; VI-LABEL: name: test_store_global_s7_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; VI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; VI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s7_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX9: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX9-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s7) = G_TRUNC %1
@@ -96,20 +96,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s8_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
     ; CI-LABEL: name: test_store_global_s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
     ; VI-LABEL: name: test_store_global_s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s8) = G_TRUNC %1
@@ -124,30 +124,30 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
-    ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
     ; CI-LABEL: name: test_store_global_s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
-    ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s16) = G_TRUNC %1
@@ -162,20 +162,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
     ; CI-LABEL: name: test_store_global_s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
     ; VI-LABEL: name: test_store_global_s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s16) = G_TRUNC %1
@@ -190,20 +190,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
     ; CI-LABEL: name: test_store_global_s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s16) = G_TRUNC %1
@@ -218,44 +218,44 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s24_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
     ; CI-LABEL: name: test_store_global_s24_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
-    ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
+    ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_s24_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s24_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
-    ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
+    ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s24) = G_TRUNC %1
@@ -270,48 +270,48 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s24_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
-    ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
+    ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
     ; CI-LABEL: name: test_store_global_s24_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_s24_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
-    ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
+    ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s24_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s24) = G_TRUNC %1
@@ -326,62 +326,62 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s24_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C2]](s32)
-    ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
     ; CI-LABEL: name: test_store_global_s24_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1)
-    ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1)
+    ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_s24_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16)
-    ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16)
-    ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16)
+    ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s24_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1)
-    ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1)
+    ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s24) = G_TRUNC %1
@@ -396,28 +396,28 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s25_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; SI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; SI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
     ; CI-LABEL: name: test_store_global_s25_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; CI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431
+    ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
     ; VI-LABEL: name: test_store_global_s25_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; VI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; VI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s25_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; GFX9: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX9-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s25) = G_TRUNC %1
@@ -456,50 +456,50 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s32_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32)
-    ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
     ; CI-LABEL: name: test_store_global_s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32)
-    ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s32_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     G_STORE %1, %0 :: (store (s32), align 1, addrspace 1)
@@ -513,30 +513,30 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s32_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
-    ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
     ; CI-LABEL: name: test_store_global_s32_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
-    ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s32_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     G_STORE %1, %0 :: (store (s32), align 2, addrspace 1)
@@ -550,20 +550,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
     ; CI-LABEL: name: test_store_global_s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
     ; VI-LABEL: name: test_store_global_s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX9: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     G_STORE %1, %0 :: (store (s32), align 4, addrspace 1)
@@ -577,52 +577,52 @@ body: |
 
     ; SI-LABEL: name: test_store_global_p3_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
-    ; SI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32)
-    ; SI: G_STORE [[PTRTOINT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
+    ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[PTRTOINT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
     ; CI-LABEL: name: test_store_global_p3_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
-    ; CI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
+    ; CI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_p3_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
-    ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32)
-    ; VI: G_STORE [[PTRTOINT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
+    ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[PTRTOINT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_p3_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
-    ; GFX9: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
+    ; GFX9-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p3) = COPY $vgpr2
     G_STORE %1, %0 :: (store (p3), align 1, addrspace 1)
@@ -636,32 +636,32 @@ body: |
 
     ; SI-LABEL: name: test_store_global_p3_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
-    ; SI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
-    ; SI: G_STORE [[PTRTOINT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
+    ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[PTRTOINT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
     ; CI-LABEL: name: test_store_global_p3_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
-    ; CI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
+    ; CI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_p3_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
-    ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
-    ; VI: G_STORE [[PTRTOINT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
+    ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[PTRTOINT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_p3_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
-    ; GFX9: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
+    ; GFX9-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p3) = COPY $vgpr2
     G_STORE %1, %0 :: (store (p3), align 2, addrspace 1)
@@ -675,20 +675,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_p3_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
-    ; SI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
+    ; SI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1)
     ; CI-LABEL: name: test_store_global_p3_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
-    ; CI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
+    ; CI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1)
     ; VI-LABEL: name: test_store_global_p3_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
-    ; VI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
+    ; VI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1)
     ; GFX9-LABEL: name: test_store_global_p3_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
-    ; GFX9: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
+    ; GFX9-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p3) = COPY $vgpr2
     G_STORE %1, %0 :: (store (p3), align 4, addrspace 1)
@@ -702,86 +702,86 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s48_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; SI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s64), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY1]](s64), 32
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C2]](s32)
-    ; SI: G_STORE [[EXTRACT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[EXTRACT1]](s16)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT]], [[COPY2]](s32)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16)
-    ; SI: G_STORE [[ANYEXT]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s64), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY1]](s64), 32
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[EXTRACT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[EXTRACT1]](s16)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT]], [[COPY2]](s32)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16)
+    ; SI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
     ; CI-LABEL: name: test_store_global_s48_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; CI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
-    ; CI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
-    ; CI: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; CI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; CI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_s48_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s64), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY1]](s64), 32
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C2]](s32)
-    ; VI: G_STORE [[EXTRACT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT1]], [[C7]](s16)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16)
-    ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16)
-    ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s64), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY1]](s64), 32
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[EXTRACT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT1]], [[C7]](s16)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s48_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
-    ; GFX9: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
-    ; GFX9: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; GFX9-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1)
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; GFX9-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s48) = G_TRUNC %1
@@ -796,56 +796,56 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s48_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; SI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s64), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY1]](s64), 32
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32)
-    ; SI: G_STORE [[EXTRACT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16)
-    ; SI: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s64), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY1]](s64), 32
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[EXTRACT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16)
+    ; SI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
     ; CI-LABEL: name: test_store_global_s48_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; CI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
-    ; CI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
-    ; CI: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; CI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1)
+    ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; CI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_s48_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s64), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY1]](s64), 32
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32)
-    ; VI: G_STORE [[EXTRACT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16)
-    ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s64), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY1]](s64), 32
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[EXTRACT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s48_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
-    ; GFX9: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
-    ; GFX9: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; GFX9-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1)
+    ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; GFX9-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s48) = G_TRUNC %1
@@ -885,96 +885,96 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s64_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32)
-    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32)
+    ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
     ; CI-LABEL: name: test_store_global_s64_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16)
-    ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16)
-    ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16)
-    ; VI: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16)
-    ; VI: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s64_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (s64), align 1, addrspace 1)
@@ -988,46 +988,46 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s64_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
     ; CI-LABEL: name: test_store_global_s64_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_s64_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s64_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (s64), align 2, addrspace 1)
@@ -1041,20 +1041,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s64_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1)
     ; CI-LABEL: name: test_store_global_s64_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_s64_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s64_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (s64), align 4, addrspace 1)
@@ -1068,20 +1068,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s64_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1)
     ; CI-LABEL: name: test_store_global_s64_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1)
     ; VI-LABEL: name: test_store_global_s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s64_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (s64), align 8, addrspace 1)
@@ -1095,20 +1095,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s64_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_s64_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_s64_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s64_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (s64), align 16, addrspace 1)
@@ -1122,96 +1122,96 @@ body: |
 
     ; SI-LABEL: name: test_store_global_p0_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](p0)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32)
-    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](p0)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32)
+    ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
     ; CI-LABEL: name: test_store_global_p0_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_p0_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](p0)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16)
-    ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16)
-    ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16)
-    ; VI: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16)
-    ; VI: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](p0)
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_p0_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p0) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (p0), align 1, addrspace 1)
@@ -1225,46 +1225,46 @@ body: |
 
     ; SI-LABEL: name: test_store_global_p0_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](p0)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](p0)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
     ; CI-LABEL: name: test_store_global_p0_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_p0_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](p0)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](p0)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_p0_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p0) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (p0), align 2, addrspace 1)
@@ -1278,20 +1278,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_p0_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1)
     ; CI-LABEL: name: test_store_global_p0_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_p0_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_p0_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p0) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (p0), align 4, addrspace 1)
@@ -1305,20 +1305,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_p0_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1)
     ; CI-LABEL: name: test_store_global_p0_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1)
     ; VI-LABEL: name: test_store_global_p0_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1)
     ; GFX9-LABEL: name: test_store_global_p0_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p0) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (p0), align 8, addrspace 1)
@@ -1332,20 +1332,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_p0_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_p0_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_p0_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_p0_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p0) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (p0), align 16, addrspace 1)
@@ -1359,96 +1359,96 @@ body: |
 
     ; SI-LABEL: name: test_store_global_p999_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](p999)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32)
-    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](p999)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32)
+    ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
     ; CI-LABEL: name: test_store_global_p999_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_p999_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](p999)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16)
-    ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16)
-    ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16)
-    ; VI: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16)
-    ; VI: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](p999)
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_p999_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p999) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (p999), align 1, addrspace 1)
@@ -1462,46 +1462,46 @@ body: |
 
     ; SI-LABEL: name: test_store_global_p999_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](p999)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](p999)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
     ; CI-LABEL: name: test_store_global_p999_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_p999_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](p999)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](p999)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_p999_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p999) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (p999), align 2, addrspace 1)
@@ -1515,20 +1515,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_p999_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1)
     ; CI-LABEL: name: test_store_global_p999_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_p999_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_p999_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p999) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (p999), align 4, addrspace 1)
@@ -1542,20 +1542,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_p999_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1)
     ; CI-LABEL: name: test_store_global_p999_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1)
     ; VI-LABEL: name: test_store_global_p999_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1)
     ; GFX9-LABEL: name: test_store_global_p999_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p999) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (p999), align 8, addrspace 1)
@@ -1569,20 +1569,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_p999_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_p999_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_p999_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_p999_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p999) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (p999), align 16, addrspace 1)
@@ -1596,76 +1596,76 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2s32_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
     ; CI-LABEL: name: test_store_global_v2s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2s32_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (<2 x s32>), align 1, addrspace 1)
@@ -1679,44 +1679,44 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2s32_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
     ; CI-LABEL: name: test_store_global_v2s32_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2s32_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (<2 x s32>), align 2, addrspace 1)
@@ -1730,20 +1730,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
     ; CI-LABEL: name: test_store_global_v2s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (<2 x s32>), align 4, addrspace 1)
@@ -1757,20 +1757,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2s32_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
     ; CI-LABEL: name: test_store_global_v2s32_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
     ; VI-LABEL: name: test_store_global_v2s32_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2s32_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (<2 x s32>), align 8, addrspace 1)
@@ -1784,20 +1784,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2s32_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_v2s32_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2s32_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2s32_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (<2 x s32>), align 16, addrspace 1)
@@ -1811,80 +1811,80 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2p3_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; SI: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
-    ; SI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32)
-    ; SI: G_STORE [[PTRTOINT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C2]](s32)
-    ; SI: G_STORE [[PTRTOINT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
+    ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[PTRTOINT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[PTRTOINT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
     ; CI-LABEL: name: test_store_global_v2p3_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2p3_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; VI: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
-    ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32)
-    ; VI: G_STORE [[PTRTOINT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C2]](s32)
-    ; VI: G_STORE [[PTRTOINT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
+    ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[PTRTOINT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[PTRTOINT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2p3_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x p3>) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (<2 x p3>), align 1, addrspace 1)
@@ -1898,48 +1898,48 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2p3_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; SI: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
-    ; SI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
-    ; SI: G_STORE [[PTRTOINT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C]](s32)
-    ; SI: G_STORE [[PTRTOINT1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
+    ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[PTRTOINT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[PTRTOINT1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
     ; CI-LABEL: name: test_store_global_v2p3_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2p3_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; VI: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
-    ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
-    ; VI: G_STORE [[PTRTOINT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C]](s32)
-    ; VI: G_STORE [[PTRTOINT1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
+    ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[PTRTOINT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[PTRTOINT1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2p3_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x p3>) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (<2 x p3>), align 2, addrspace 1)
@@ -1953,20 +1953,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2p3_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
     ; CI-LABEL: name: test_store_global_v2p3_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2p3_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2p3_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x p3>) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (<2 x p3>), align 4, addrspace 1)
@@ -1980,20 +1980,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2p3_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
     ; CI-LABEL: name: test_store_global_v2p3_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
     ; VI-LABEL: name: test_store_global_v2p3_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2p3_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x p3>) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (<2 x p3>), align 8, addrspace 1)
@@ -2007,20 +2007,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2p3_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_v2p3_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2p3_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2p3_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x p3>) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (<2 x p3>), align 16, addrspace 1)
@@ -2034,82 +2034,82 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v4s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; SI: G_STORE [[BITCAST2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C2]](s32)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
     ; CI-LABEL: name: test_store_global_v4s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_v4s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; VI: G_STORE [[BITCAST2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C2]](s32)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v4s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (<4 x s16>), align 1, addrspace 1)
@@ -2123,50 +2123,50 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v4s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; SI: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: G_STORE [[BITCAST1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
     ; CI-LABEL: name: test_store_global_v4s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_v4s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; VI: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: G_STORE [[BITCAST1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v4s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (<4 x s16>), align 2, addrspace 1)
@@ -2180,20 +2180,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v4s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1)
     ; CI-LABEL: name: test_store_global_v4s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_v4s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v4s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (<4 x s16>), align 4, addrspace 1)
@@ -2207,20 +2207,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v4s16_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1)
     ; CI-LABEL: name: test_store_global_v4s16_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1)
     ; VI-LABEL: name: test_store_global_v4s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v4s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (<4 x s16>), align 8, addrspace 1)
@@ -2234,20 +2234,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v4s16_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_v4s16_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; CI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_v4s16_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v4s16_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (<4 x s16>), align 16, addrspace 1)
@@ -2261,104 +2261,104 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v3s32_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
     ; CI-LABEL: name: test_store_global_v3s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; CI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_v3s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v3s32_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; GFX9: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     G_STORE %1, %0 :: (store (<3 x s32>), align 1, addrspace 1)
@@ -2372,60 +2372,60 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v3s32_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
     ; CI-LABEL: name: test_store_global_v3s32_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; CI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_v3s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v3s32_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; GFX9: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     G_STORE %1, %0 :: (store (<3 x s32>), align 2, addrspace 1)
@@ -2439,25 +2439,25 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v3s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
-    ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
+    ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1)
     ; CI-LABEL: name: test_store_global_v3s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; CI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_v3s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v3s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; GFX9: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     G_STORE %1, %0 :: (store (<3 x s32>), align 4, addrspace 1)
@@ -2471,25 +2471,25 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v3s32_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
-    ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
+    ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
     ; CI-LABEL: name: test_store_global_v3s32_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; CI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1)
     ; VI-LABEL: name: test_store_global_v3s32_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v3s32_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; GFX9: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     G_STORE %1, %0 :: (store (<3 x s32>), align 8, addrspace 1)
@@ -2503,25 +2503,25 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v3s32_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
-    ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
+    ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
     ; CI-LABEL: name: test_store_global_v3s32_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; CI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_v3s32_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v3s32_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; GFX9: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     G_STORE %1, %0 :: (store (<3 x s32>), align 16, addrspace 1)
@@ -2535,124 +2535,124 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v4s32_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
     ; CI-LABEL: name: test_store_global_v4s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_v4s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v4s32_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<4 x s32>), align 1, addrspace 1)
@@ -2666,70 +2666,70 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v4s32_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
-    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; SI: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; SI: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
     ; CI-LABEL: name: test_store_global_v4s32_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_v4s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
-    ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; VI: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; VI: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v4s32_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<4 x s32>), align 2, addrspace 1)
@@ -2743,20 +2743,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v4s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
     ; CI-LABEL: name: test_store_global_v4s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_v4s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v4s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<4 x s32>), align 4, addrspace 1)
@@ -2770,20 +2770,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v4s32_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
     ; CI-LABEL: name: test_store_global_v4s32_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
     ; VI-LABEL: name: test_store_global_v4s32_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v4s32_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<4 x s32>), align 8, addrspace 1)
@@ -2797,20 +2797,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v4s32_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     ; CI-LABEL: name: test_store_global_v4s32_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     ; VI-LABEL: name: test_store_global_v4s32_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v4s32_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<4 x s32>), align 16, addrspace 1)
@@ -2824,162 +2824,162 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2s64_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C2]]
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32)
-    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C2]]
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; SI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C2]]
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY5]](s32)
-    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]]
-    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY6]](s32)
-    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C2]]
-    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY7]](s32)
-    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C2]]
-    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY8]](s32)
-    ; SI: G_STORE [[UV4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR8]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR6]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR9]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; SI: G_STORE [[UV5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s64)
-    ; SI: G_STORE [[LSHR10]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C8]](s64)
-    ; SI: G_STORE [[LSHR7]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C9]](s64)
-    ; SI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C2]]
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32)
+    ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C2]]
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32)
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C2]]
+    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY5]](s32)
+    ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]]
+    ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY6]](s32)
+    ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C2]]
+    ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY7]](s32)
+    ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C2]]
+    ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY8]](s32)
+    ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; SI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s64)
+    ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C8]](s64)
+    ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C9]](s64)
+    ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
     ; CI-LABEL: name: test_store_global_v2s64_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16)
-    ; VI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16)
-    ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16)
-    ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16)
-    ; VI: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16)
-    ; VI: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
-    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32)
-    ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16)
-    ; VI: G_STORE [[UV4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16)
-    ; VI: G_STORE [[ANYEXT4]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR6]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16)
-    ; VI: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: G_STORE [[UV5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
-    ; VI: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16)
-    ; VI: G_STORE [[ANYEXT6]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s64)
-    ; VI: G_STORE [[LSHR7]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C8]](s64)
-    ; VI: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16)
-    ; VI: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32)
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16)
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16)
+    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16)
+    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16)
+    ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s64)
+    ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C8]](s64)
+    ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2s64_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<2 x s64>), align 1, addrspace 1)
@@ -2993,72 +2993,72 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2s64_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; SI: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
     ; CI-LABEL: name: test_store_global_v2s64_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2s64_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; VI: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2s64_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<2 x s64>), align 2, addrspace 1)
@@ -3072,20 +3072,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2s64_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1)
     ; CI-LABEL: name: test_store_global_v2s64_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2s64_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2s64_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<2 x s64>), align 4, addrspace 1)
@@ -3099,20 +3099,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2s64_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1)
     ; CI-LABEL: name: test_store_global_v2s64_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2s64_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<2 x s64>), align 8, addrspace 1)
@@ -3126,20 +3126,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2s64_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1)
     ; CI-LABEL: name: test_store_global_v2s64_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1)
     ; VI-LABEL: name: test_store_global_v2s64_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2s64_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<2 x s64>), align 16, addrspace 1)
@@ -3153,128 +3153,128 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v8s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
     ; CI-LABEL: name: test_store_global_v8s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_v8s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v8s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<8 x s16>), align 1, addrspace 1)
@@ -3288,74 +3288,74 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v8s16_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
-    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; SI: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; SI: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
     ; CI-LABEL: name: test_store_global_v8s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_v8s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
-    ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; VI: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; VI: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v8s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<8 x s16>), align 2, addrspace 1)
@@ -3369,24 +3369,24 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v8s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
     ; CI-LABEL: name: test_store_global_v8s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_v8s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v8s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<8 x s16>), align 4, addrspace 1)
@@ -3400,24 +3400,24 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v8s16_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
     ; CI-LABEL: name: test_store_global_v8s16_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
     ; VI-LABEL: name: test_store_global_v8s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v8s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<8 x s16>), align 8, addrspace 1)
@@ -3431,24 +3431,24 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v8s16_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     ; CI-LABEL: name: test_store_global_v8s16_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     ; VI-LABEL: name: test_store_global_v8s16_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v8s16_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
-    ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<8 x s16>), align 16, addrspace 1)
@@ -3462,128 +3462,128 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2p0_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
     ; CI-LABEL: name: test_store_global_v2p0_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2p0_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2p0_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<2 x p0>), align 1, addrspace 1)
@@ -3597,74 +3597,74 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2p0_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
-    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; SI: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; SI: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
     ; CI-LABEL: name: test_store_global_v2p0_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2p0_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
-    ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; VI: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; VI: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2p0_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<2 x p0>), align 2, addrspace 1)
@@ -3678,24 +3678,24 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2p0_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
     ; CI-LABEL: name: test_store_global_v2p0_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2p0_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2p0_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<2 x p0>), align 4, addrspace 1)
@@ -3709,24 +3709,24 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2p0_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
     ; CI-LABEL: name: test_store_global_v2p0_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2p0_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2p0_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<2 x p0>), align 8, addrspace 1)
@@ -3740,24 +3740,24 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2p0_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     ; CI-LABEL: name: test_store_global_v2p0_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     ; VI-LABEL: name: test_store_global_v2p0_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2p0_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
-    ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<2 x p0>), align 16, addrspace 1)
@@ -3771,108 +3771,108 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s96_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
     ; CI-LABEL: name: test_store_global_s96_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; CI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; CI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_s96_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s96_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; GFX9: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4
     G_STORE %1, %0 :: (store (s96), align 1, addrspace 1)
@@ -3886,64 +3886,64 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s96_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
     ; CI-LABEL: name: test_store_global_s96_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; CI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; CI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_s96_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s96_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; GFX9: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4
     G_STORE %1, %0 :: (store (s96), align 2, addrspace 1)
@@ -3957,29 +3957,29 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s96_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
-    ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
+    ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1)
     ; CI-LABEL: name: test_store_global_s96_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; CI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; CI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_s96_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; VI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; VI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s96_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; GFX9: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4
     G_STORE %1, %0 :: (store (s96), align 4, addrspace 1)
@@ -3993,29 +3993,29 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s96_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
-    ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
+    ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
     ; CI-LABEL: name: test_store_global_s96_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; CI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; CI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1)
     ; VI-LABEL: name: test_store_global_s96_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; VI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; VI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s96_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; GFX9: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4
     G_STORE %1, %0 :: (store (s96), align 8, addrspace 1)
@@ -4029,29 +4029,29 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s96_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
-    ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
+    ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
     ; CI-LABEL: name: test_store_global_s96_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; CI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; CI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_s96_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; VI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; VI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s96_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
-    ; GFX9: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4
     G_STORE %1, %0 :: (store (s96), align 16, addrspace 1)
@@ -4065,128 +4065,128 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s128_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
     ; CI-LABEL: name: test_store_global_s128_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_s128_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s128_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (s128), align 1, addrspace 1)
@@ -4200,74 +4200,74 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s128_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
-    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; SI: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; SI: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
     ; CI-LABEL: name: test_store_global_s128_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_s128_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
-    ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; VI: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; VI: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s128_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (s128), align 2, addrspace 1)
@@ -4281,24 +4281,24 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s128_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
     ; CI-LABEL: name: test_store_global_s128_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_s128_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s128_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (s128), align 4, addrspace 1)
@@ -4312,24 +4312,24 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s128_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
     ; CI-LABEL: name: test_store_global_s128_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
     ; VI-LABEL: name: test_store_global_s128_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s128_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (s128), align 8, addrspace 1)
@@ -4343,24 +4343,24 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s128_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     ; CI-LABEL: name: test_store_global_s128_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     ; VI-LABEL: name: test_store_global_s128_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s128_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (s128), align 16, addrspace 1)
@@ -4374,162 +4374,162 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v5s32_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
-    ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
-    ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
-    ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
-    ; SI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
-    ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
     ; CI-LABEL: name: test_store_global_v5s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_v5s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
-    ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
-    ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
-    ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
-    ; VI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
-    ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v5s32_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
     G_STORE %1, %0 :: (store (<5 x s32>), align 1, addrspace 1)
@@ -4543,96 +4543,96 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v5s32_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
-    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; SI: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; SI: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
     ; CI-LABEL: name: test_store_global_v5s32_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_v5s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
-    ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; VI: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; VI: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v5s32_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
     G_STORE %1, %0 :: (store (<5 x s32>), align 2, addrspace 1)
@@ -4646,40 +4646,40 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v5s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_v5s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_v5s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v5s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
     G_STORE %1, %0 :: (store (<5 x s32>), align 4, addrspace 1)
@@ -4693,40 +4693,40 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v5s32_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
     ; CI-LABEL: name: test_store_global_v5s32_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
     ; VI-LABEL: name: test_store_global_v5s32_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v5s32_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
     G_STORE %1, %0 :: (store (<5 x s32>), align 8, addrspace 1)
@@ -4740,40 +4740,40 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v5s32_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_v5s32_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_v5s32_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v5s32_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
     G_STORE %1, %0 :: (store (<5 x s32>), align 16, addrspace 1)
@@ -4786,166 +4786,166 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
     ; SI-LABEL: name: test_store_global_v5p3_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; SI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
-    ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
-    ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
-    ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
-    ; SI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
-    ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
     ; CI-LABEL: name: test_store_global_v5p3_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_v5p3_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; VI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
-    ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
-    ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
-    ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
-    ; VI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
-    ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v5p3_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
     G_STORE %1, %0 :: (store (<5 x p3>), align 1, addrspace 1)
@@ -4959,100 +4959,100 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v5p3_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; SI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
-    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; SI: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; SI: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
     ; CI-LABEL: name: test_store_global_v5p3_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_v5p3_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; VI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
-    ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; VI: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; VI: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v5p3_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
     G_STORE %1, %0 :: (store (<5 x p3>), align 2, addrspace 1)
@@ -5066,44 +5066,44 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v5p3_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; SI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_v5p3_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_v5p3_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; VI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v5p3_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
     G_STORE %1, %0 :: (store (<5 x p3>), align 4, addrspace 1)
@@ -5117,44 +5117,44 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v5p3_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; SI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
     ; CI-LABEL: name: test_store_global_v5p3_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
     ; VI-LABEL: name: test_store_global_v5p3_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; VI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v5p3_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
     G_STORE %1, %0 :: (store (<5 x p3>), align 8, addrspace 1)
@@ -5168,44 +5168,44 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v5p3_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; SI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_v5p3_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_v5p3_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; VI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v5p3_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
     G_STORE %1, %0 :: (store (<5 x p3>), align 16, addrspace 1)
@@ -5219,44 +5219,44 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v10s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_v10s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>)
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>)
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_v10s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v10s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>)
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<10 x s16>) = G_IMPLICIT_DEF
     G_STORE %1, %0 :: (store (<10 x s16>), align 16, addrspace 1)
@@ -5270,160 +5270,160 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v11s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>)
-    ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; SI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; SI: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; SI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; SI: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; SI: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; SI: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; SI: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; SI: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; SI: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>)
-    ; SI: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>)
-    ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
-    ; SI: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
-    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>)
-    ; SI: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
-    ; SI: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>)
+    ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; SI-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; SI-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; SI-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; SI-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; SI-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; SI-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; SI-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; SI-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; SI-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>)
+    ; SI-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>)
+    ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0
+    ; SI-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>)
+    ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
+    ; SI-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>)
+    ; SI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
     ; CI-LABEL: name: test_store_global_v11s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>)
-    ; CI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; CI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; CI: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; CI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; CI: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; CI: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; CI: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; CI: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; CI: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; CI: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>)
-    ; CI: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>)
-    ; CI: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128
-    ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>)
-    ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
-    ; CI: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
-    ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>)
-    ; CI: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64)
-    ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
-    ; CI: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>)
+    ; CI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; CI-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; CI-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; CI-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; CI-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; CI-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; CI-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; CI-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; CI-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; CI-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>)
+    ; CI-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>)
+    ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0
+    ; CI-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>)
+    ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
+    ; CI-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>)
+    ; CI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
+    ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64)
+    ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+    ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
+    ; CI-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_v11s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>)
-    ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; VI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; VI: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; VI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; VI: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; VI: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; VI: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; VI: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; VI: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; VI: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>)
-    ; VI: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>)
-    ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
-    ; VI: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
-    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>)
-    ; VI: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
-    ; VI: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>)
+    ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; VI-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; VI-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; VI-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; VI-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; VI-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; VI-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; VI-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; VI-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; VI-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>)
+    ; VI-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>)
+    ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0
+    ; VI-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>)
+    ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
+    ; VI-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>)
+    ; VI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v11s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>)
-    ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; GFX9: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; GFX9: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; GFX9: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; GFX9: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; GFX9: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; GFX9: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; GFX9: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; GFX9: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>)
-    ; GFX9: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>)
-    ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
-    ; GFX9: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>)
-    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
-    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>)
-    ; GFX9: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
-    ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64)
-    ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
-    ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
-    ; GFX9: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>)
+    ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>)
+    ; GFX9-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>)
+    ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0
+    ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>)
+    ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
+    ; GFX9-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>)
+    ; GFX9-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64)
+    ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+    ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
+    ; GFX9-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<11 x s16>) = G_IMPLICIT_DEF
     G_STORE %1, %0 :: (store (<11 x s16>), align 16, addrspace 1)
@@ -5437,44 +5437,44 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v12s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128
-    ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128
+    ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_v12s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
+    ; CI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_v12s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128
-    ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128
+    ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v12s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
+    ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<12 x s16>) = G_IMPLICIT_DEF
     G_STORE %1, %0 :: (store (<12 x s16>), align 16, addrspace 1)
@@ -5488,166 +5488,166 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s160_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; SI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
-    ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
-    ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
-    ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
-    ; SI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
-    ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
     ; CI-LABEL: name: test_store_global_s160_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_s160_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; VI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
-    ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
-    ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
-    ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
-    ; VI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
-    ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s160_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
     G_STORE %1, %0 :: (store (s160), align 1, addrspace 1)
@@ -5661,100 +5661,100 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s160_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; SI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
-    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; SI: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; SI: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
     ; CI-LABEL: name: test_store_global_s160_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_s160_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; VI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
-    ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; VI: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; VI: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s160_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
     G_STORE %1, %0 :: (store (s160), align 2, addrspace 1)
@@ -5768,44 +5768,44 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s160_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; SI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_s160_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_s160_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; VI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s160_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
     G_STORE %1, %0 :: (store (s160), align 4, addrspace 1)
@@ -5819,44 +5819,44 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s160_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; SI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
     ; CI-LABEL: name: test_store_global_s160_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
     ; VI-LABEL: name: test_store_global_s160_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; VI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s160_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
     G_STORE %1, %0 :: (store (s160), align 8, addrspace 1)
@@ -5870,44 +5870,44 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s160_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; SI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_s160_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_s160_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; VI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s160_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
     G_STORE %1, %0 :: (store (s160), align 16, addrspace 1)
@@ -5921,226 +5921,226 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v8s32_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; SI: G_STORE [[UV4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; SI: G_STORE [[UV5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
-    ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>)
-    ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; SI: G_STORE [[UV6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
-    ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
-    ; SI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
-    ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
-    ; SI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; SI: G_STORE [[UV7]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1)
-    ; SI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 21, addrspace 1)
-    ; SI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 22, addrspace 1)
-    ; SI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1)
-    ; SI: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64)
-    ; SI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32)
-    ; SI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C1]](s32)
-    ; SI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32)
-    ; SI: G_STORE [[UV8]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1)
-    ; SI: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 25, addrspace 1)
-    ; SI: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 26, addrspace 1)
-    ; SI: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1)
-    ; SI: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C8]](s64)
-    ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C]](s32)
-    ; SI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C1]](s32)
-    ; SI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C2]](s32)
-    ; SI: G_STORE [[UV9]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1)
-    ; SI: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 29, addrspace 1)
-    ; SI: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 30, addrspace 1)
-    ; SI: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>)
+    ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV7]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 21, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 22, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64)
+    ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32)
+    ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 25, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 26, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C8]](s64)
+    ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C]](s32)
+    ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV9]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 29, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 30, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1)
     ; CI-LABEL: name: test_store_global_v8s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_v8s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; VI: G_STORE [[UV4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; VI: G_STORE [[UV5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
-    ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; VI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>)
-    ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; VI: G_STORE [[UV6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
-    ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
-    ; VI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
-    ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
-    ; VI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; VI: G_STORE [[UV7]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1)
-    ; VI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 21, addrspace 1)
-    ; VI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 22, addrspace 1)
-    ; VI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1)
-    ; VI: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64)
-    ; VI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32)
-    ; VI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C1]](s32)
-    ; VI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32)
-    ; VI: G_STORE [[UV8]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1)
-    ; VI: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 25, addrspace 1)
-    ; VI: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 26, addrspace 1)
-    ; VI: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1)
-    ; VI: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C8]](s64)
-    ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C]](s32)
-    ; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C1]](s32)
-    ; VI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C2]](s32)
-    ; VI: G_STORE [[UV9]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1)
-    ; VI: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 29, addrspace 1)
-    ; VI: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 30, addrspace 1)
-    ; VI: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>)
+    ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV7]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 21, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 22, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64)
+    ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32)
+    ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 25, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 26, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C8]](s64)
+    ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C]](s32)
+    ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV9]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 29, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 30, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v8s32_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
     G_STORE %1, %0 :: (store (<8 x s32>), align 1, addrspace 1)
@@ -6154,128 +6154,128 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v8s32_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; SI: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV]](<4 x s32>)
-    ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; SI: G_STORE [[UV4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; SI: G_STORE [[UV5]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](<2 x s32>)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; SI: G_STORE [[UV6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; SI: G_STORE [[UV7]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV1]](<4 x s32>)
-    ; SI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV8]](<2 x s32>)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C]](s32)
-    ; SI: G_STORE [[UV10]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C]](s32)
-    ; SI: G_STORE [[UV11]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1)
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV9]](<2 x s32>)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C]](s32)
-    ; SI: G_STORE [[UV12]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV13]], [[C]](s32)
-    ; SI: G_STORE [[UV13]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV]](<4 x s32>)
+    ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](<2 x s32>)
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV7]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV1]](<4 x s32>)
+    ; SI-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV8]](<2 x s32>)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV10]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV11]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV9]](<2 x s32>)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV12]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV13]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV13]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1)
     ; CI-LABEL: name: test_store_global_v8s32_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_v8s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; VI: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV]](<4 x s32>)
-    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; VI: G_STORE [[UV4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; VI: G_STORE [[UV5]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](<2 x s32>)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; VI: G_STORE [[UV6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; VI: G_STORE [[UV7]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV1]](<4 x s32>)
-    ; VI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV8]](<2 x s32>)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C]](s32)
-    ; VI: G_STORE [[UV10]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C]](s32)
-    ; VI: G_STORE [[UV11]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1)
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV9]](<2 x s32>)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C]](s32)
-    ; VI: G_STORE [[UV12]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV13]], [[C]](s32)
-    ; VI: G_STORE [[UV13]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV]](<4 x s32>)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](<2 x s32>)
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV7]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV1]](<4 x s32>)
+    ; VI-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV8]](<2 x s32>)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV10]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV11]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV9]](<2 x s32>)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV12]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV13]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV13]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v8s32_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
     G_STORE %1, %0 :: (store (<8 x s32>), align 2, addrspace 1)
@@ -6289,36 +6289,36 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v8s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; SI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
     ; CI-LABEL: name: test_store_global_v8s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_v8s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; VI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v8s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
     G_STORE %1, %0 :: (store (<8 x s32>), align 4, addrspace 1)
@@ -6332,36 +6332,36 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v8s32_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; SI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
     ; CI-LABEL: name: test_store_global_v8s32_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
     ; VI-LABEL: name: test_store_global_v8s32_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; VI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v8s32_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
     G_STORE %1, %0 :: (store (<8 x s32>), align 8, addrspace 1)
@@ -6375,36 +6375,36 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v8s32_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; SI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_v8s32_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_v8s32_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; VI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v8s32_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
     G_STORE %1, %0 :: (store (<8 x s32>), align 16, addrspace 1)
@@ -6418,40 +6418,40 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2s128_align32
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>)
-    ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; SI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_v2s128_align32
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>)
-    ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2s128_align32
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>)
-    ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; VI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v2s128_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
     G_STORE %1, %0 :: (store (<2 x s128>), align 32, addrspace 1)
@@ -6465,230 +6465,230 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s256_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; SI: G_STORE [[UV4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; SI: G_STORE [[UV5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
-    ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>)
-    ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; SI: G_STORE [[UV6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
-    ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
-    ; SI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
-    ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
-    ; SI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; SI: G_STORE [[UV7]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1)
-    ; SI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 21, addrspace 1)
-    ; SI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 22, addrspace 1)
-    ; SI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1)
-    ; SI: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64)
-    ; SI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32)
-    ; SI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C1]](s32)
-    ; SI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32)
-    ; SI: G_STORE [[UV8]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1)
-    ; SI: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 25, addrspace 1)
-    ; SI: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 26, addrspace 1)
-    ; SI: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1)
-    ; SI: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C8]](s64)
-    ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C]](s32)
-    ; SI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C1]](s32)
-    ; SI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C2]](s32)
-    ; SI: G_STORE [[UV9]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1)
-    ; SI: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 29, addrspace 1)
-    ; SI: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 30, addrspace 1)
-    ; SI: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>)
+    ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV7]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 21, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 22, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64)
+    ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32)
+    ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 25, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 26, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C8]](s64)
+    ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C]](s32)
+    ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV9]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 29, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 30, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1)
     ; CI-LABEL: name: test_store_global_s256_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_s256_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; VI: G_STORE [[UV4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; VI: G_STORE [[UV5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
-    ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; VI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>)
-    ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; VI: G_STORE [[UV6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
-    ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
-    ; VI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
-    ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
-    ; VI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; VI: G_STORE [[UV7]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1)
-    ; VI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 21, addrspace 1)
-    ; VI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 22, addrspace 1)
-    ; VI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1)
-    ; VI: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64)
-    ; VI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32)
-    ; VI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C1]](s32)
-    ; VI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32)
-    ; VI: G_STORE [[UV8]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1)
-    ; VI: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 25, addrspace 1)
-    ; VI: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 26, addrspace 1)
-    ; VI: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1)
-    ; VI: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C8]](s64)
-    ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C]](s32)
-    ; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C1]](s32)
-    ; VI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C2]](s32)
-    ; VI: G_STORE [[UV9]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1)
-    ; VI: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 29, addrspace 1)
-    ; VI: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 30, addrspace 1)
-    ; VI: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>)
+    ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV7]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 21, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 22, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64)
+    ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32)
+    ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 25, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 26, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C8]](s64)
+    ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C]](s32)
+    ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV9]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 29, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 30, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s256_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
     G_STORE %1, %0 :: (store (s256), align 1, addrspace 1)
@@ -6702,132 +6702,132 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s256_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; SI: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV]](<4 x s32>)
-    ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; SI: G_STORE [[UV4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; SI: G_STORE [[UV5]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](<2 x s32>)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; SI: G_STORE [[UV6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; SI: G_STORE [[UV7]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV1]](<4 x s32>)
-    ; SI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV8]](<2 x s32>)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C]](s32)
-    ; SI: G_STORE [[UV10]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C]](s32)
-    ; SI: G_STORE [[UV11]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1)
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV9]](<2 x s32>)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C]](s32)
-    ; SI: G_STORE [[UV12]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV13]], [[C]](s32)
-    ; SI: G_STORE [[UV13]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV]](<4 x s32>)
+    ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](<2 x s32>)
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV7]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV1]](<4 x s32>)
+    ; SI-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV8]](<2 x s32>)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV10]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV11]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV9]](<2 x s32>)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV12]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV13]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV13]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1)
     ; CI-LABEL: name: test_store_global_s256_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_s256_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; VI: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV]](<4 x s32>)
-    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; VI: G_STORE [[UV4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; VI: G_STORE [[UV5]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](<2 x s32>)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; VI: G_STORE [[UV6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; VI: G_STORE [[UV7]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV1]](<4 x s32>)
-    ; VI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV8]](<2 x s32>)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C]](s32)
-    ; VI: G_STORE [[UV10]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C]](s32)
-    ; VI: G_STORE [[UV11]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1)
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV9]](<2 x s32>)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C]](s32)
-    ; VI: G_STORE [[UV12]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV13]], [[C]](s32)
-    ; VI: G_STORE [[UV13]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV]](<4 x s32>)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](<2 x s32>)
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV7]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV1]](<4 x s32>)
+    ; VI-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV8]](<2 x s32>)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV10]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV11]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV9]](<2 x s32>)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV12]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV13]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV13]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s256_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
     G_STORE %1, %0 :: (store (s256), align 2, addrspace 1)
@@ -6841,40 +6841,40 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s256_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; SI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
     ; CI-LABEL: name: test_store_global_s256_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_s256_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; VI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s256_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
     G_STORE %1, %0 :: (store (s256), align 4, addrspace 1)
@@ -6888,40 +6888,40 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s256_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; SI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
     ; CI-LABEL: name: test_store_global_s256_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
     ; VI-LABEL: name: test_store_global_s256_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; VI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s256_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
     G_STORE %1, %0 :: (store (s256), align 8, addrspace 1)
@@ -6935,40 +6935,40 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s256_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; SI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_s256_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_s256_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; VI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s256_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
     G_STORE %1, %0 :: (store (s256), align 16, addrspace 1)
@@ -6982,40 +6982,40 @@ body: |
 
     ; SI-LABEL: name: test_store_global_s256_align32
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; SI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_s256_align32
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_s256_align32
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; VI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_s256_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
-    ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
-    ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256)
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
+    ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
     G_STORE %1, %0 :: (store (s256), align 32, addrspace 1)
@@ -7029,36 +7029,36 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v8s32_align32
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; SI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_v8s32_align32
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; CI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; CI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_v8s32_align32
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; VI-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v8s32_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-    ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
-    ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+    ; GFX9-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 32, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
     G_STORE %1, %0 :: (store (<8 x s32>), align 32, addrspace 1)
@@ -7072,276 +7072,276 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v9s32_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; SI: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; SI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
-    ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>)
-    ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; SI: G_STORE [[UV4]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
-    ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
-    ; SI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
-    ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
-    ; SI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; SI: G_STORE [[UV5]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1)
-    ; SI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 21, addrspace 1)
-    ; SI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 22, addrspace 1)
-    ; SI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1)
-    ; SI: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64)
-    ; SI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; SI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; SI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; SI: G_STORE [[UV6]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1)
-    ; SI: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 25, addrspace 1)
-    ; SI: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 26, addrspace 1)
-    ; SI: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1)
-    ; SI: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C8]](s64)
-    ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; SI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; SI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; SI: G_STORE [[UV7]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1)
-    ; SI: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 29, addrspace 1)
-    ; SI: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 30, addrspace 1)
-    ; SI: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1)
-    ; SI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; SI: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; SI: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C]](s32)
-    ; SI: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C1]](s32)
-    ; SI: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C2]](s32)
-    ; SI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD31]](p1) :: (store (s8) into unknown-address + 32, addrspace 1)
-    ; SI: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR24]](s32), [[PTR_ADD32]](p1) :: (store (s8) into unknown-address + 33, addrspace 1)
-    ; SI: [[PTR_ADD33:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR25]](s32), [[PTR_ADD33]](p1) :: (store (s8) into unknown-address + 34, addrspace 1)
-    ; SI: [[PTR_ADD34:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR26]](s32), [[PTR_ADD34]](p1) :: (store (s8) into unknown-address + 35, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>)
+    ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 21, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 22, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64)
+    ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV6]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 25, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 26, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C8]](s64)
+    ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[UV7]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 29, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 30, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1)
+    ; SI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; SI-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; SI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C]](s32)
+    ; SI-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C1]](s32)
+    ; SI-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C2]](s32)
+    ; SI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD31]](p1) :: (store (s8) into unknown-address + 32, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR24]](s32), [[PTR_ADD32]](p1) :: (store (s8) into unknown-address + 33, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR25]](s32), [[PTR_ADD33]](p1) :: (store (s8) into unknown-address + 34, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR26]](s32), [[PTR_ADD34]](p1) :: (store (s8) into unknown-address + 35, addrspace 1)
     ; CI-LABEL: name: test_store_global_v9s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; CI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; CI: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; CI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_v9s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; VI: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
-    ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
-    ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
-    ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>)
-    ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; VI: G_STORE [[UV4]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
-    ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
-    ; VI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
-    ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
-    ; VI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
-    ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; VI: G_STORE [[UV5]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1)
-    ; VI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 21, addrspace 1)
-    ; VI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 22, addrspace 1)
-    ; VI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1)
-    ; VI: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64)
-    ; VI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; VI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; VI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; VI: G_STORE [[UV6]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1)
-    ; VI: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 25, addrspace 1)
-    ; VI: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 26, addrspace 1)
-    ; VI: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1)
-    ; VI: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C8]](s64)
-    ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; VI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; VI: G_STORE [[UV7]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1)
-    ; VI: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 29, addrspace 1)
-    ; VI: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 30, addrspace 1)
-    ; VI: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1)
-    ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; VI: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; VI: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C]](s32)
-    ; VI: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C1]](s32)
-    ; VI: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C2]](s32)
-    ; VI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD31]](p1) :: (store (s8) into unknown-address + 32, addrspace 1)
-    ; VI: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR24]](s32), [[PTR_ADD32]](p1) :: (store (s8) into unknown-address + 33, addrspace 1)
-    ; VI: [[PTR_ADD33:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C4]](s64)
-    ; VI: G_STORE [[LSHR25]](s32), [[PTR_ADD33]](p1) :: (store (s8) into unknown-address + 34, addrspace 1)
-    ; VI: [[PTR_ADD34:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C5]](s64)
-    ; VI: G_STORE [[LSHR26]](s32), [[PTR_ADD34]](p1) :: (store (s8) into unknown-address + 35, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
+    ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1)
+    ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
+    ; VI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>)
+    ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64)
+    ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 21, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 22, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64)
+    ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV6]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 25, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 26, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C8]](s64)
+    ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[UV7]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 29, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 30, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1)
+    ; VI-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; VI-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
+    ; VI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C]](s32)
+    ; VI-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C2]](s32)
+    ; VI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD31]](p1) :: (store (s8) into unknown-address + 32, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR24]](s32), [[PTR_ADD32]](p1) :: (store (s8) into unknown-address + 33, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C4]](s64)
+    ; VI-NEXT: G_STORE [[LSHR25]](s32), [[PTR_ADD33]](p1) :: (store (s8) into unknown-address + 34, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C5]](s64)
+    ; VI-NEXT: G_STORE [[LSHR26]](s32), [[PTR_ADD34]](p1) :: (store (s8) into unknown-address + 35, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v9s32_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; GFX9: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; GFX9: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; GFX9: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
@@ -7358,166 +7358,166 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v9s32_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; SI: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; SI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; SI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
-    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; SI: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; SI: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>)
-    ; SI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV6]](<2 x s32>)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32)
-    ; SI: G_STORE [[UV8]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
-    ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
-    ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C]](s32)
-    ; SI: G_STORE [[UV9]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1)
-    ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1)
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; SI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV7]](<2 x s32>)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C]](s32)
-    ; SI: G_STORE [[UV10]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1)
-    ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1)
-    ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C]](s32)
-    ; SI: G_STORE [[UV11]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1)
-    ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C]](s32)
-    ; SI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1)
-    ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR8]](s32), [[PTR_ADD16]](p1) :: (store (s16) into unknown-address + 34, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+    ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>)
+    ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV6]](<2 x s32>)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV9]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; SI-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV7]](<2 x s32>)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV10]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV11]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1)
+    ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD16]](p1) :: (store (s16) into unknown-address + 34, addrspace 1)
     ; CI-LABEL: name: test_store_global_v9s32_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; CI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; CI: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; CI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_v9s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; VI: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; VI: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
-    ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; VI: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; VI: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
-    ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>)
-    ; VI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV6]](<2 x s32>)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32)
-    ; VI: G_STORE [[UV8]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
-    ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
-    ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C]](s32)
-    ; VI: G_STORE [[UV9]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1)
-    ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1)
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
-    ; VI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV7]](<2 x s32>)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C]](s32)
-    ; VI: G_STORE [[UV10]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1)
-    ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1)
-    ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C]](s32)
-    ; VI: G_STORE [[UV11]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1)
-    ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C]](s32)
-    ; VI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1)
-    ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR8]](s32), [[PTR_ADD16]](p1) :: (store (s16) into unknown-address + 34, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>)
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>)
+    ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV6]](<2 x s32>)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV9]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
+    ; VI-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV7]](<2 x s32>)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV10]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV11]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1)
+    ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1)
+    ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD16]](p1) :: (store (s16) into unknown-address + 34, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v9s32_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; GFX9: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; GFX9: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; GFX9: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
@@ -7534,68 +7534,68 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v9s32_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; SI: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; SI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1)
     ; CI-LABEL: name: test_store_global_v9s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; CI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; CI: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; CI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1)
     ; VI-LABEL: name: test_store_global_v9s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; VI: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v9s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; GFX9: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; GFX9: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; GFX9: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
@@ -7612,68 +7612,68 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v9s32_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; SI: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; SI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1)
     ; CI-LABEL: name: test_store_global_v9s32_align8
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; CI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; CI: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; CI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1)
     ; VI-LABEL: name: test_store_global_v9s32_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; VI: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v9s32_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; GFX9: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; GFX9: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; GFX9: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
@@ -7690,68 +7690,68 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v9s32_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; SI: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; SI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1)
     ; CI-LABEL: name: test_store_global_v9s32_align16
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; CI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; CI: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; CI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; CI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1)
+    ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1)
     ; VI-LABEL: name: test_store_global_v9s32_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; VI: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; VI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1)
     ; GFX9-LABEL: name: test_store_global_v9s32_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; GFX9: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
-    ; GFX9: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
-    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
-    ; GFX9: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
-    ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
-    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; GFX9: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1)
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
+    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
+    ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
+    ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
+    ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
+    ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
index aabb4889db6e3..2ba1f62d73240 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
@@ -18,12 +18,12 @@ body: |
 
     ; SI-LABEL: name: test_store_global_i32
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
     ; VI-LABEL: name: test_store_global_i32
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     G_STORE %1, %0 :: (store (s32), addrspace 1)
@@ -37,12 +37,12 @@ body: |
 
     ; SI-LABEL: name: test_store_global_i64
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1)
     ; VI-LABEL: name: test_store_global_i64
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (s64), addrspace 1)
@@ -56,12 +56,12 @@ body: |
 
     ; SI-LABEL: name: test_store_global_p1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store (p1), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store (p1), addrspace 1)
     ; VI-LABEL: name: test_store_global_p1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store (p1), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store (p1), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p1) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (p1), addrspace 1)
@@ -75,12 +75,12 @@ body: |
 
     ; SI-LABEL: name: test_store_global_p4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](p4), [[COPY]](p1) :: (store (p4), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](p4), [[COPY]](p1) :: (store (p4), addrspace 1)
     ; VI-LABEL: name: test_store_global_p4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](p4), [[COPY]](p1) :: (store (p4), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](p4), [[COPY]](p1) :: (store (p4), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p4) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (p4), addrspace 1)
@@ -94,12 +94,12 @@ body: |
 
     ; SI-LABEL: name: test_store_global_p3
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
-    ; SI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
+    ; SI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1)
     ; VI-LABEL: name: test_store_global_p3
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
-    ; VI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
+    ; VI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p3) = COPY $vgpr2
     G_STORE %1, %0 :: (store (p3), addrspace 1)
@@ -113,12 +113,12 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2s32
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; SI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
     ; VI-LABEL: name: test_store_global_v2s32
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; VI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (<2 x s32>), addrspace 1)
@@ -132,12 +132,12 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2s16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
-    ; SI: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store (<2 x s16>), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+    ; SI-NEXT: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store (<2 x s16>), addrspace 1)
     ; VI-LABEL: name: test_store_global_v2s16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
-    ; VI: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store (<2 x s16>), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+    ; VI-NEXT: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store (<2 x s16>), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = COPY $vgpr2
     G_STORE %1, %0 :: (store (<2 x s16>), addrspace 1)
@@ -151,17 +151,17 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v3s32
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
-    ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
+    ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1)
     ; VI-LABEL: name: test_store_global_v3s32
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     G_STORE %1, %0 :: (store (<3 x s32>), align 4, addrspace 1)
@@ -175,14 +175,14 @@ body: |
 
     ; SI-LABEL: name: test_truncstore_global_s64_to_s8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
-    ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
     ; VI-LABEL: name: test_truncstore_global_s64_to_s8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
-    ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; VI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (s8), addrspace 1)
@@ -196,14 +196,14 @@ body: |
 
     ; SI-LABEL: name: test_truncstore_global_s64_to_s16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
-    ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
     ; VI-LABEL: name: test_truncstore_global_s64_to_s16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
-    ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; VI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (s16), addrspace 1)
@@ -217,28 +217,28 @@ body: |
 
     ; SI-LABEL: name: test_truncstore_global_s64_to_s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]]
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]]
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
     ; VI-LABEL: name: test_truncstore_global_s64_to_s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
-    ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (s16), addrspace 1, align 1)
@@ -252,14 +252,14 @@ body: |
 
     ; SI-LABEL: name: test_truncstore_global_s64_to_s32
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
-    ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
     ; VI-LABEL: name: test_truncstore_global_s64_to_s32
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
-    ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; VI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (s32), addrspace 1)
@@ -273,24 +273,24 @@ body: |
 
     ; SI-LABEL: name: test_truncstore_global_s64_to_s32_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
     ; VI-LABEL: name: test_truncstore_global_s64_to_s32_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (s32), addrspace 1, align 2)
@@ -304,50 +304,50 @@ body: |
 
     ; SI-LABEL: name: test_truncstore_global_s64_to_s32_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C1]](s32)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C1]](s32)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
     ; VI-LABEL: name: test_truncstore_global_s64_to_s32_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16)
-    ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16)
-    ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16)
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store (s32), addrspace 1, align 1)
@@ -361,14 +361,14 @@ body: |
 
     ; SI-LABEL: name: test_truncstore_global_s128_to_s16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128)
-    ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128)
+    ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
     ; VI-LABEL: name: test_truncstore_global_s128_to_s16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128)
-    ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128)
+    ; VI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (s16), addrspace 1)
@@ -382,14 +382,14 @@ body: |
 
     ; SI-LABEL: name: test_truncstore_global_s128_to_s8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     ; VI-LABEL: name: test_truncstore_global_s128_to_s8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (s128), addrspace 1)
@@ -403,18 +403,18 @@ body: |
 
     ; SI-LABEL: name: test_store_global_i1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
-    ; SI: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+    ; SI-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
     ; VI-LABEL: name: test_store_global_i1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
-    ; VI: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+    ; VI-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s1) = G_TRUNC %1
@@ -429,12 +429,12 @@ body: |
 
     ; SI-LABEL: name: test_store_global_i8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
     ; VI-LABEL: name: test_store_global_i8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s8) = G_TRUNC %1
@@ -449,12 +449,12 @@ body: |
 
     ; SI-LABEL: name: test_store_global_i16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
     ; VI-LABEL: name: test_store_global_i16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s16) = G_TRUNC %1
@@ -469,19 +469,19 @@ body: |
 
     ; SI-LABEL: name: test_store_global_96
     ; SI: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
-    ; SI: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4
-    ; SI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY]](s96)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
-    ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY1]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64)
-    ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY]](s96)
+    ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
+    ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
+    ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY1]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
     ; VI-LABEL: name: test_store_global_96
     ; VI: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
-    ; VI: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4
-    ; VI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY]](s96)
-    ; VI: G_STORE [[BITCAST]](<3 x s32>), [[COPY1]](p1) :: (store (<3 x s32>), align 16, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY]](s96)
+    ; VI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY1]](p1) :: (store (<3 x s32>), align 16, addrspace 1)
     %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(p1) = COPY $vgpr3_vgpr4
 
@@ -496,14 +496,14 @@ body: |
 
     ; SI-LABEL: name: test_store_global_i128
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     ; VI-LABEL: name: test_store_global_i128
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
-    ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128)
+    ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (s128), addrspace 1)
@@ -517,12 +517,12 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2s64
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1)
     ; VI-LABEL: name: test_store_global_v2s64
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<2 x s64>), addrspace 1)
@@ -537,20 +537,20 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2s8_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_IMPLICIT_DEF
     G_STORE %1, %0 :: (store (<2 x s8>), addrspace 1, align 1)
@@ -565,33 +565,33 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2s8_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; SI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
     ; VI-LABEL: name: test_store_global_v2s8_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_IMPLICIT_DEF
     G_STORE %1, %0 :: (store (<2 x s8>), addrspace 1, align 2)
@@ -606,33 +606,33 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v2s8_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; SI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_v2s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_IMPLICIT_DEF
     G_STORE %1, %0 :: (store (<2 x s8>), addrspace 1, align 4)
@@ -647,52 +647,52 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v3s8_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[COPY3:%[0-9]+]]:_(s16) = COPY [[OR]](s16)
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY3]](s16)
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT]], [[C1]](s32)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY3]](s16)
-    ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[UV2]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[OR]](s16)
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY3]](s16)
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT]], [[C1]](s32)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY3]](s16)
+    ; SI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_v3s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[COPY2:%[0-9]+]]:_(s16) = COPY [[OR]](s16)
-    ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[COPY2]], [[C1]](s16)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY2]](s16)
-    ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
-    ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[UV2]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[OR]](s16)
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[COPY2]], [[C1]](s16)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY2]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     %2:_(<3 x s8>) = G_TRUNC %1
@@ -708,41 +708,41 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v3s8_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[COPY2:%[0-9]+]]:_(s16) = COPY [[OR]](s16)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY2]](s16)
-    ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[OR]](s16)
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY2]](s16)
+    ; SI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_v3s8_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[COPY2:%[0-9]+]]:_(s16) = COPY [[OR]](s16)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY2]](s16)
-    ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[OR]](s16)
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY2]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     %2:_(<3 x s8>) = G_TRUNC %1
@@ -758,66 +758,66 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v3s8_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
-    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C2]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C3]](s32)
-    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; SI: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
-    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C2]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C3]](s32)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
+    ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_v3s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
-    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
-    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32)
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1)
+    ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     %2:_(<3 x s8>) = G_TRUNC %1
@@ -833,32 +833,32 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v4s8_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
-    ; SI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: G_STORE [[UV1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; SI: G_STORE [[UV2]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; SI: G_STORE [[UV3]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+    ; SI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
     ; VI-LABEL: name: test_store_global_v4s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
-    ; VI: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: G_STORE [[UV1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; VI: G_STORE [[UV2]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: G_STORE [[UV3]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+    ; VI-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     %2:_(<4 x s8>) = G_TRUNC %1
@@ -874,54 +874,54 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v4s8_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
-    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C2]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; SI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C2]]
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; SI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; SI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_v4s8_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
-    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
-    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
-    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]]
-    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16)
-    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
+    ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]]
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     %2:_(<4 x s8>) = G_TRUNC %1
@@ -937,42 +937,42 @@ body: |
 
     ; SI-LABEL: name: test_store_global_v4s8_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; SI: G_STORE [[OR2]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI-NEXT: G_STORE [[OR2]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
     ; VI-LABEL: name: test_store_global_v4s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
-    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
-    ; VI: G_STORE [[OR2]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI-NEXT: G_STORE [[OR2]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     %2:_(<4 x s8>) = G_TRUNC %1
@@ -988,12 +988,12 @@ body: |
 
     ; SI-LABEL: name: test_truncstore_global_v2s8_to_1_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[DEF:%[0-9]+]]:_(<2 x s8>) = G_IMPLICIT_DEF
-    ; SI: G_STORE [[DEF]](<2 x s8>), [[COPY]](p1) :: (store (<2 x s4>), addrspace 1)
+    ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s8>) = G_IMPLICIT_DEF
+    ; SI-NEXT: G_STORE [[DEF]](<2 x s8>), [[COPY]](p1) :: (store (<2 x s4>), addrspace 1)
     ; VI-LABEL: name: test_truncstore_global_v2s8_to_1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[DEF:%[0-9]+]]:_(<2 x s8>) = G_IMPLICIT_DEF
-    ; VI: G_STORE [[DEF]](<2 x s8>), [[COPY]](p1) :: (store (<2 x s4>), addrspace 1)
+    ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s8>) = G_IMPLICIT_DEF
+    ; VI-NEXT: G_STORE [[DEF]](<2 x s8>), [[COPY]](p1) :: (store (<2 x s4>), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_IMPLICIT_DEF
     G_STORE %1, %0 :: (store (<2 x s4>), addrspace 1, align 1)
@@ -1008,14 +1008,14 @@ body: |
 
     ; SI-LABEL: name: test_truncstore_global_v3s8_to_1_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>)
-    ; SI: G_STORE [[TRUNC]](<3 x s8>), [[COPY]](p1) :: (store (<3 x s2>), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>)
+    ; SI-NEXT: G_STORE [[TRUNC]](<3 x s8>), [[COPY]](p1) :: (store (<3 x s2>), addrspace 1)
     ; VI-LABEL: name: test_truncstore_global_v3s8_to_1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>)
-    ; VI: G_STORE [[TRUNC]](<3 x s8>), [[COPY]](p1) :: (store (<3 x s2>), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>)
+    ; VI-NEXT: G_STORE [[TRUNC]](<3 x s8>), [[COPY]](p1) :: (store (<3 x s2>), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     %2:_(<3 x s8>) = G_TRUNC %1
@@ -1031,14 +1031,14 @@ body: |
 
     ; SI-LABEL: name: test_truncstore_global_v3s8_to_2_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>)
-    ; SI: G_STORE [[TRUNC]](<3 x s8>), [[COPY]](p1) :: (store (<3 x s4>), addrspace 1)
+    ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>)
+    ; SI-NEXT: G_STORE [[TRUNC]](<3 x s8>), [[COPY]](p1) :: (store (<3 x s4>), addrspace 1)
     ; VI-LABEL: name: test_truncstore_global_v3s8_to_2_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>)
-    ; VI: G_STORE [[TRUNC]](<3 x s8>), [[COPY]](p1) :: (store (<3 x s4>), addrspace 1)
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>)
+    ; VI-NEXT: G_STORE [[TRUNC]](<3 x s8>), [[COPY]](p1) :: (store (<3 x s4>), addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     %2:_(<3 x s8>) = G_TRUNC %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
index 3f0222b1351b1..57436036720c1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
@@ -7,9 +7,9 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_s32_s64
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
-    ; CHECK: $vgpr0 = COPY [[UV]](s32)
-    ; CHECK: $vgpr1 = COPY [[UV1]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+    ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
     %0:_(s64) = G_CONSTANT i64 0
     %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0:_(s64)
     $vgpr0 = COPY %1(s32)
@@ -23,9 +23,9 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: test_unmerge_s32_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: $vgpr0 = COPY [[UV]](s32)
-    ; CHECK: $vgpr21 = COPY [[UV1]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
+    ; CHECK-NEXT: $vgpr21 = COPY [[UV1]](s32)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0
     $vgpr0 = COPY %1
@@ -39,11 +39,11 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_unmerge_s16_v2s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[BITCAST]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %0
     %3:_(s32) = G_ANYEXT %1
@@ -58,14 +58,14 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_s16_v3s16
     ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CHECK: $vgpr0 = COPY [[BITCAST]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR]](s32)
-    ; CHECK: $vgpr2 = COPY [[BITCAST1]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[BITCAST1]](s32)
     %0:_(<3 x s16>) = G_IMPLICIT_DEF
     %1:_(s16), %2:_(s16),  %3:_(s16) = G_UNMERGE_VALUES %0
     %4:_(s32) = G_ANYEXT %1
@@ -84,16 +84,16 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: test_unmerge_s16_v4s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[BITCAST]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR]](s32)
-    ; CHECK: $vgpr2 = COPY [[BITCAST1]](s32)
-    ; CHECK: $vgpr3 = COPY [[LSHR1]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[BITCAST1]](s32)
+    ; CHECK-NEXT: $vgpr3 = COPY [[LSHR1]](s32)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(s16), %2:_(s16), %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %0
     %5:_(s32) = G_ANYEXT %1
@@ -113,20 +113,20 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: test_unmerge_s16_v6s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[BITCAST]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR]](s32)
-    ; CHECK: $vgpr2 = COPY [[BITCAST1]](s32)
-    ; CHECK: $vgpr3 = COPY [[LSHR1]](s32)
-    ; CHECK: $vgpr4 = COPY [[BITCAST2]](s32)
-    ; CHECK: $vgpr5 = COPY [[LSHR2]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[BITCAST1]](s32)
+    ; CHECK-NEXT: $vgpr3 = COPY [[LSHR1]](s32)
+    ; CHECK-NEXT: $vgpr4 = COPY [[BITCAST2]](s32)
+    ; CHECK-NEXT: $vgpr5 = COPY [[LSHR2]](s32)
     %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s16), %2:_(s16), %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0
     %7:_(s32) = G_ANYEXT %1
@@ -151,10 +151,10 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_unmerge_s8_s16
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s16) = G_TRUNC %0
     %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1
@@ -171,16 +171,16 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_unmerge_s8_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR]](s32)
-    ; CHECK: $vgpr2 = COPY [[LSHR1]](s32)
-    ; CHECK: $vgpr3 = COPY [[LSHR2]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](s32)
+    ; CHECK-NEXT: $vgpr3 = COPY [[LSHR2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8), %2:_(s8), %3:_(s8), %4:_(s8) = G_UNMERGE_VALUES %0
     %5:_(s32) = G_ANYEXT %1
@@ -200,25 +200,25 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: test_unmerge_s8_s48
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
-    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C1]](s32)
-    ; CHECK: $vgpr0 = COPY [[UV]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR1]](s32)
-    ; CHECK: $vgpr2 = COPY [[LSHR]](s32)
-    ; CHECK: $vgpr3 = COPY [[LSHR2]](s32)
-    ; CHECK: $vgpr4 = COPY [[UV1]](s32)
-    ; CHECK: $vgpr5 = COPY [[LSHR3]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32)
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+    ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C1]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR1]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: $vgpr3 = COPY [[LSHR2]](s32)
+    ; CHECK-NEXT: $vgpr4 = COPY [[UV1]](s32)
+    ; CHECK-NEXT: $vgpr5 = COPY [[LSHR3]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s48) = G_TRUNC %0
     %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8), %6:_(s8), %7:_(s8) = G_UNMERGE_VALUES %1
@@ -243,12 +243,12 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: test_unmerge_s16_s48
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[UV]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR]](s32)
-    ; CHECK: $vgpr2 = COPY [[UV1]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[UV1]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s48) = G_TRUNC %0
     %2:_(s16), %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1
@@ -267,31 +267,31 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: test_unmerge_s8_s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32)
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
-    ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32)
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
-    ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32)
-    ; CHECK: $vgpr0 = COPY [[UV]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR2]](s32)
-    ; CHECK: $vgpr2 = COPY [[LSHR]](s32)
-    ; CHECK: $vgpr3 = COPY [[LSHR3]](s32)
-    ; CHECK: $vgpr4 = COPY [[UV1]](s32)
-    ; CHECK: $vgpr5 = COPY [[LSHR4]](s32)
-    ; CHECK: $vgpr6 = COPY [[LSHR1]](s32)
-    ; CHECK: $vgpr7 = COPY [[LSHR5]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+    ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32)
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
+    ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR2]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: $vgpr3 = COPY [[LSHR3]](s32)
+    ; CHECK-NEXT: $vgpr4 = COPY [[UV1]](s32)
+    ; CHECK-NEXT: $vgpr5 = COPY [[LSHR4]](s32)
+    ; CHECK-NEXT: $vgpr6 = COPY [[LSHR1]](s32)
+    ; CHECK-NEXT: $vgpr7 = COPY [[LSHR5]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s8), %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %0
     %10:_(s32) = G_ANYEXT %1
@@ -319,31 +319,31 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: test_unmerge_s8_p1
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](p1)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32)
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
-    ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32)
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
-    ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32)
-    ; CHECK: $vgpr0 = COPY [[UV]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR2]](s32)
-    ; CHECK: $vgpr2 = COPY [[LSHR]](s32)
-    ; CHECK: $vgpr3 = COPY [[LSHR3]](s32)
-    ; CHECK: $vgpr4 = COPY [[UV1]](s32)
-    ; CHECK: $vgpr5 = COPY [[LSHR4]](s32)
-    ; CHECK: $vgpr6 = COPY [[LSHR1]](s32)
-    ; CHECK: $vgpr7 = COPY [[LSHR5]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](p1)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+    ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32)
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
+    ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR2]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: $vgpr3 = COPY [[LSHR3]](s32)
+    ; CHECK-NEXT: $vgpr4 = COPY [[UV1]](s32)
+    ; CHECK-NEXT: $vgpr5 = COPY [[LSHR4]](s32)
+    ; CHECK-NEXT: $vgpr6 = COPY [[LSHR1]](s32)
+    ; CHECK-NEXT: $vgpr7 = COPY [[LSHR5]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s8), %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %0
     %10:_(s32) = G_ANYEXT %1
@@ -371,63 +371,63 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: test_unmerge_s4_p1
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](p1)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32)
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
-    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32)
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
-    ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32)
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY4]](s32)
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY5]](s32)
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
-    ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY6]](s32)
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
-    ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY7]](s32)
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
-    ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY8]](s32)
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
-    ; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY9]](s32)
-    ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
-    ; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[C1]](s32)
-    ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
-    ; CHECK: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C3]](s32)
-    ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
-    ; CHECK: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[C4]](s32)
-    ; CHECK: $vgpr0 = COPY [[UV]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR2]](s32)
-    ; CHECK: $vgpr2 = COPY [[LSHR3]](s32)
-    ; CHECK: $vgpr3 = COPY [[LSHR4]](s32)
-    ; CHECK: $vgpr4 = COPY [[LSHR]](s32)
-    ; CHECK: $vgpr5 = COPY [[LSHR5]](s32)
-    ; CHECK: $vgpr6 = COPY [[LSHR6]](s32)
-    ; CHECK: $vgpr7 = COPY [[LSHR7]](s32)
-    ; CHECK: $vgpr8 = COPY [[UV1]](s32)
-    ; CHECK: $vgpr9 = COPY [[LSHR8]](s32)
-    ; CHECK: $vgpr10 = COPY [[LSHR9]](s32)
-    ; CHECK: $vgpr11 = COPY [[LSHR10]](s32)
-    ; CHECK: $vgpr12 = COPY [[LSHR1]](s32)
-    ; CHECK: $vgpr13 = COPY [[LSHR11]](s32)
-    ; CHECK: $vgpr14 = COPY [[LSHR12]](s32)
-    ; CHECK: $vgpr15 = COPY [[LSHR13]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](p1)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
+    ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32)
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]]
+    ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY4]](s32)
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY5]](s32)
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+    ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY6]](s32)
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+    ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY7]](s32)
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+    ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY8]](s32)
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+    ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+    ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY9]](s32)
+    ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
+    ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[C1]](s32)
+    ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
+    ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C3]](s32)
+    ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
+    ; CHECK-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[C4]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR2]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[LSHR3]](s32)
+    ; CHECK-NEXT: $vgpr3 = COPY [[LSHR4]](s32)
+    ; CHECK-NEXT: $vgpr4 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: $vgpr5 = COPY [[LSHR5]](s32)
+    ; CHECK-NEXT: $vgpr6 = COPY [[LSHR6]](s32)
+    ; CHECK-NEXT: $vgpr7 = COPY [[LSHR7]](s32)
+    ; CHECK-NEXT: $vgpr8 = COPY [[UV1]](s32)
+    ; CHECK-NEXT: $vgpr9 = COPY [[LSHR8]](s32)
+    ; CHECK-NEXT: $vgpr10 = COPY [[LSHR9]](s32)
+    ; CHECK-NEXT: $vgpr11 = COPY [[LSHR10]](s32)
+    ; CHECK-NEXT: $vgpr12 = COPY [[LSHR1]](s32)
+    ; CHECK-NEXT: $vgpr13 = COPY [[LSHR11]](s32)
+    ; CHECK-NEXT: $vgpr14 = COPY [[LSHR12]](s32)
+    ; CHECK-NEXT: $vgpr15 = COPY [[LSHR13]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s4), %2:_(s4), %3:_(s4), %4:_(s4), %5:_(s4), %6:_(s4), %7:_(s4), %8:_(s4), %9:_(s4), %10:_(s4), %11:_(s4), %12:_(s4), %13:_(s4), %14:_(s4), %15:_(s4), %16:_(s4) = G_UNMERGE_VALUES %0
     %17:_(s32) = G_ANYEXT %1
@@ -471,14 +471,14 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: test_unmerge_s16_p1
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](p1)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[UV]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR]](s32)
-    ; CHECK: $vgpr2 = COPY [[UV1]](s32)
-    ; CHECK: $vgpr3 = COPY [[LSHR1]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](p1)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[UV1]](s32)
+    ; CHECK-NEXT: $vgpr3 = COPY [[LSHR1]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s16), %2:_(s16), %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %0
     %5:_(s32) = G_ANYEXT %1
@@ -498,9 +498,9 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: test_unmerge_s32_p1
     ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](p1)
-    ; CHECK: $vgpr0 = COPY [[UV]](s32)
-    ; CHECK: $vgpr1 = COPY [[UV1]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](p1)
+    ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0
     $vgpr0 = COPY %1
@@ -514,10 +514,10 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_unmerge_s16_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %0
     %3:_(s32) = G_ANYEXT %1
@@ -533,11 +533,11 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_unmerge_s16_p3
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[PTRTOINT]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[PTRTOINT]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %0
     %3:_(s32) = G_ANYEXT %1
@@ -553,17 +553,17 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_unmerge_s8_p3
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32)
-    ; CHECK: $vgpr0 = COPY [[PTRTOINT]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR]](s32)
-    ; CHECK: $vgpr2 = COPY [[LSHR1]](s32)
-    ; CHECK: $vgpr3 = COPY [[LSHR2]](s32)
+    ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[PTRTOINT]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](s32)
+    ; CHECK-NEXT: $vgpr3 = COPY [[LSHR2]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s8), %2:_(s8), %3:_(s8), %4:_(s8) = G_UNMERGE_VALUES %0
     %5:_(s32) = G_ANYEXT %1
@@ -584,14 +584,14 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: test_unmerge_s16_s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[UV]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR]](s32)
-    ; CHECK: $vgpr2 = COPY [[UV1]](s32)
-    ; CHECK: $vgpr3 = COPY [[LSHR1]](s32)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[UV1]](s32)
+    ; CHECK-NEXT: $vgpr3 = COPY [[LSHR1]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s16), %2:_(s16), %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %0
     %5:_(s32) = G_ANYEXT %1
@@ -611,13 +611,13 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_unmerge_s1_s3
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR]](s32)
-    ; CHECK: $vgpr2 = COPY [[LSHR1]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s3) = G_TRUNC %0
     %2:_(s1), %3:_(s1), %4:_(s1) = G_UNMERGE_VALUES %1
@@ -636,28 +636,28 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: test_unmerge_s1_s8
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C3]](s32)
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C4]](s32)
-    ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C5]](s32)
-    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-    ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C6]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR]](s32)
-    ; CHECK: $vgpr2 = COPY [[LSHR1]](s32)
-    ; CHECK: $vgpr3 = COPY [[LSHR2]](s32)
-    ; CHECK: $vgpr4 = COPY [[LSHR3]](s32)
-    ; CHECK: $vgpr5 = COPY [[LSHR4]](s32)
-    ; CHECK: $vgpr6 = COPY [[LSHR5]](s32)
-    ; CHECK: $vgpr7 = COPY [[LSHR6]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C3]](s32)
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C4]](s32)
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C5]](s32)
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C6]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](s32)
+    ; CHECK-NEXT: $vgpr3 = COPY [[LSHR2]](s32)
+    ; CHECK-NEXT: $vgpr4 = COPY [[LSHR3]](s32)
+    ; CHECK-NEXT: $vgpr5 = COPY [[LSHR4]](s32)
+    ; CHECK-NEXT: $vgpr6 = COPY [[LSHR5]](s32)
+    ; CHECK-NEXT: $vgpr7 = COPY [[LSHR6]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_TRUNC %0
     %2:_(s1), %3:_(s1), %4:_(s1), %5:_(s1), %6:_(s1), %7:_(s1), %8:_(s1), %9:_(s1) = G_UNMERGE_VALUES %1
@@ -687,9 +687,9 @@ body: |
     liveins:  $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; CHECK-LABEL: name: test_unmerge_s128_v2s128
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[UV]](s128)
-    ; CHECK: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[UV1]](s128)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[UV]](s128)
+    ; CHECK-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[UV1]](s128)
     %0:_(<2 x s128>) = COPY  $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     %1:_(s128), %2:_(s128) = G_UNMERGE_VALUES %0
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -704,9 +704,9 @@ body: |
     liveins:  $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; CHECK-LABEL: name: test_unmerge_s128_s256
     ; CHECK: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[UV]](s128)
-    ; CHECK: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[UV1]](s128)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[UV]](s128)
+    ; CHECK-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[UV1]](s128)
     %0:_(s256) = COPY  $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     %1:_(s128), %2:_(s128) = G_UNMERGE_VALUES %0
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -722,9 +722,9 @@ body: |
 
     ; CHECK-LABEL: name: test_unmerge_s256_s512
     ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; CHECK: [[UV:%[0-9]+]]:_(s256), [[UV1:%[0-9]+]]:_(s256) = G_UNMERGE_VALUES [[COPY]](s512)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[UV]](s256)
-    ; CHECK: $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[UV1]](s256)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s256), [[UV1:%[0-9]+]]:_(s256) = G_UNMERGE_VALUES [[COPY]](s512)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[UV]](s256)
+    ; CHECK-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[UV1]](s256)
     %0:_(s512) = COPY  $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     %1:_(s256), %2:_(s256) = G_UNMERGE_VALUES %0
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -740,9 +740,9 @@ body: |
 
     ; CHECK-LABEL: name: test_unmerge_s256_v2s256
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s256>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; CHECK: [[UV:%[0-9]+]]:_(s256), [[UV1:%[0-9]+]]:_(s256) = G_UNMERGE_VALUES [[COPY]](<2 x s256>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[UV]](s256)
-    ; CHECK: $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[UV1]](s256)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s256), [[UV1:%[0-9]+]]:_(s256) = G_UNMERGE_VALUES [[COPY]](<2 x s256>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[UV]](s256)
+    ; CHECK-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[UV1]](s256)
     %0:_(<2 x s256>) = COPY  $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     %1:_(s256), %2:_(s256) = G_UNMERGE_VALUES %0
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -758,9 +758,9 @@ body: |
 
     ; CHECK-LABEL: name: test_unmerge_s512_s1024
     ; CHECK: [[COPY:%[0-9]+]]:_(s1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
-    ; CHECK: [[UV:%[0-9]+]]:_(s512), [[UV1:%[0-9]+]]:_(s512) = G_UNMERGE_VALUES [[COPY]](s1024)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[UV]](s512)
-    ; CHECK: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[UV1]](s512)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s512), [[UV1:%[0-9]+]]:_(s512) = G_UNMERGE_VALUES [[COPY]](s1024)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[UV]](s512)
+    ; CHECK-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[UV1]](s512)
     %0:_(s1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
     %1:_(s512), %2:_(s512) = G_UNMERGE_VALUES %0
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
@@ -776,9 +776,9 @@ body: |
 
     ; CHECK-LABEL: name: test_unmerge_s512_v2s512
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s512>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
-    ; CHECK: [[UV:%[0-9]+]]:_(s512), [[UV1:%[0-9]+]]:_(s512) = G_UNMERGE_VALUES [[COPY]](<2 x s512>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[UV]](s512)
-    ; CHECK: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[UV1]](s512)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s512), [[UV1:%[0-9]+]]:_(s512) = G_UNMERGE_VALUES [[COPY]](<2 x s512>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[UV]](s512)
+    ; CHECK-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[UV1]](s512)
     %0:_(<2 x s512>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
     %1:_(s512), %2:_(s512) = G_UNMERGE_VALUES %0
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
@@ -791,11 +791,11 @@ body: |
   bb.0:
     ; CHECK-LABEL: name: test_unmerge_v2s1
     ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[UV]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[UV1]](s32)
-    ; CHECK: S_NOP 0, implicit [[TRUNC]](s1)
-    ; CHECK: S_NOP 0, implicit [[TRUNC1]](s1)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[UV]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[UV1]](s32)
+    ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC]](s1)
+    ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC1]](s1)
     %0:_(<2 x s1>) = G_IMPLICIT_DEF
     %1:_(s1), %2:_(s1) = G_UNMERGE_VALUES %0
     S_NOP 0, implicit %1
@@ -810,16 +810,16 @@ body: |
 
     ; CHECK-LABEL: name: test_unmerge_s8_v4s8
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR]](s32)
-    ; CHECK: $vgpr2 = COPY [[LSHR1]](s32)
-    ; CHECK: $vgpr3 = COPY [[LSHR2]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](s32)
+    ; CHECK-NEXT: $vgpr3 = COPY [[LSHR2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(<4 x s8>) = G_BITCAST %0
     %2:_(s8), %3:_(s8),  %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1
@@ -841,13 +841,13 @@ body: |
 
     ; CHECK-LABEL: name: test_unmerge_s8_v3s8
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR]](s32)
-    ; CHECK: $vgpr2 = COPY [[LSHR1]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s24) = G_TRUNC %0
     %2:_(<3 x s8>) = G_BITCAST %1
@@ -868,10 +868,10 @@ body: |
 
     ; CHECK-LABEL: name: test_unmerge_s8_v2s8
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
-    ; CHECK: $vgpr1 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s16) = G_TRUNC %0
     %2:_(<2 x s8>) = G_BITCAST %1
@@ -891,13 +891,13 @@ body: |
 
     ; CHECK-LABEL: name: test_unmerge_v3s32_v12s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
-    ; CHECK: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY]](<6 x s32>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY1]](<6 x s32>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x s32>)
-    ; CHECK: $vgpr3_vgpr4_vgpr5 = COPY [[UV1]](<3 x s32>)
-    ; CHECK: $vgpr6_vgpr7_vgpr8 = COPY [[UV2]](<3 x s32>)
-    ; CHECK: $vgpr9_vgpr10_vgpr11 = COPY [[UV3]](<3 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY]](<6 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY1]](<6 x s32>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x s32>)
+    ; CHECK-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[UV1]](<3 x s32>)
+    ; CHECK-NEXT: $vgpr6_vgpr7_vgpr8 = COPY [[UV2]](<3 x s32>)
+    ; CHECK-NEXT: $vgpr9_vgpr10_vgpr11 = COPY [[UV3]](<3 x s32>)
     %0:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     %1:_(<6 x s32>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
     %2:_(<12 x s32>) = G_CONCAT_VECTORS %0, %1
@@ -917,27 +917,27 @@ body: |
 
     ; CHECK-LABEL: name: test_unmerge_v3s8_v12s8
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[LSHR1]](s32)
-    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR2]](s32), [[UV1]](s32), [[LSHR3]](s32)
-    ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR4]](s32), [[LSHR5]](s32), [[UV2]](s32)
-    ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
-    ; CHECK: $vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR1]](<3 x s32>)
-    ; CHECK: $vgpr6_vgpr7_vgpr8 = COPY [[BUILD_VECTOR2]](<3 x s32>)
-    ; CHECK: $vgpr9_vgpr10_vgpr11 = COPY [[BUILD_VECTOR3]](<3 x s32>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[LSHR1]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR2]](s32), [[UV1]](s32), [[LSHR3]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR4]](s32), [[LSHR5]](s32), [[UV2]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; CHECK-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR1]](<3 x s32>)
+    ; CHECK-NEXT: $vgpr6_vgpr7_vgpr8 = COPY [[BUILD_VECTOR2]](<3 x s32>)
+    ; CHECK-NEXT: $vgpr9_vgpr10_vgpr11 = COPY [[BUILD_VECTOR3]](<3 x s32>)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<12 x s8>) = G_BITCAST %0
     %2:_(<3 x s8>), %3:_(<3 x s8>), %4:_(<3 x s8>), %5:_(<3 x s8>) = G_UNMERGE_VALUES %1
@@ -960,33 +960,33 @@ body: |
 
     ; CHECK-LABEL: name: test_unmerge_v3s16_v12s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32)
-    ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>)
-    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
-    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR1]](s32), [[BITCAST3]](s32), [[LSHR2]](s32)
-    ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>)
-    ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
-    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
-    ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
-    ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR3]](s32), [[BITCAST5]](s32)
-    ; CHECK: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>)
-    ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV22]](<2 x s16>)
-    ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
-    ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV23]](<2 x s16>)
-    ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
-    ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR4]](s32), [[BITCAST7]](s32), [[LSHR5]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
-    ; CHECK: $vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR1]](<3 x s32>)
-    ; CHECK: $vgpr6_vgpr7_vgpr8 = COPY [[BUILD_VECTOR2]](<3 x s32>)
-    ; CHECK: $vgpr9_vgpr10_vgpr11 = COPY [[BUILD_VECTOR3]](<3 x s32>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32)
+    ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>)
+    ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR1]](s32), [[BITCAST3]](s32), [[LSHR2]](s32)
+    ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>)
+    ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR3]](s32), [[BITCAST5]](s32)
+    ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>)
+    ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV22]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV23]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR4]](s32), [[BITCAST7]](s32), [[LSHR5]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; CHECK-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR1]](<3 x s32>)
+    ; CHECK-NEXT: $vgpr6_vgpr7_vgpr8 = COPY [[BUILD_VECTOR2]](<3 x s32>)
+    ; CHECK-NEXT: $vgpr9_vgpr10_vgpr11 = COPY [[BUILD_VECTOR3]](<3 x s32>)
     %0:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     %1:_(<3 x s16>), %2:_(<3 x s16>), %3:_(<3 x s16>), %4:_(<3 x s16>) = G_UNMERGE_VALUES %0
     %5:_(<3 x s32>) = G_ANYEXT %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
index 1c603e6a065b5..a0a2e3ee83032 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
@@ -9,9 +9,9 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]]
-    ; CHECK: $vgpr0 = COPY [[XOR]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[XOR]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = G_XOR %0, %1
@@ -26,9 +26,9 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_s1
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]]
-    ; CHECK: S_NOP 0, implicit [[XOR]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = G_CONSTANT i32 0
@@ -46,22 +46,22 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_v2s1
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]]
-    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]]
-    ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
-    ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[UV6]]
-    ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV5]](s32), [[UV7]]
-    ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP2]]
-    ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP3]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]]
+    ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]]
+    ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
+    ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[UV6]]
+    ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV5]](s32), [[UV7]]
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP2]]
+    ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP3]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s32>) = COPY $vgpr4_vgpr5
@@ -80,26 +80,26 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_v3s1
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
-    ; CHECK: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
-    ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]]
-    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]]
-    ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]]
-    ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
-    ; CHECK: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
-    ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV6]](s32), [[UV9]]
-    ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV7]](s32), [[UV10]]
-    ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[UV11]]
-    ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP3]]
-    ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP4]]
-    ; CHECK: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP5]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1)
-    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]]
+    ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]]
+    ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]]
+    ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+    ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV6]](s32), [[UV9]]
+    ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV7]](s32), [[UV10]]
+    ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[UV11]]
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP3]]
+    ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP4]]
+    ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP5]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1)
+    ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
     %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
@@ -118,9 +118,9 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[XOR]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[XOR]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = G_XOR %0, %1
@@ -135,16 +135,16 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_s96
     ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0
-    ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64
-    ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0
-    ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64
-    ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[EXTRACT]], [[EXTRACT2]]
-    ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[EXTRACT1]], [[EXTRACT3]]
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[XOR1]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0
+    ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64
+    ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0
+    ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[EXTRACT]], [[EXTRACT2]]
+    ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[EXTRACT1]], [[EXTRACT3]]
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[XOR1]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
     %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
     %2:_(s96) = G_XOR %0, %1
@@ -159,13 +159,13 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_128
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128)
-    ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[UV]], [[UV2]]
-    ; CHECK: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[UV1]], [[UV3]]
-    ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[XOR]](s64), [[XOR1]](s64)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128)
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[UV]], [[UV2]]
+    ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[UV1]], [[UV3]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[XOR]](s64), [[XOR1]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(s128) = G_XOR %0, %1
@@ -180,9 +180,9 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_s7
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]]
-    ; CHECK: $vgpr0 = COPY [[XOR]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[XOR]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s7) = G_TRUNC %0
@@ -200,9 +200,9 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_s8
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]]
-    ; CHECK: $vgpr0 = COPY [[XOR]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[XOR]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s8) = G_TRUNC %0
@@ -220,12 +220,12 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_s16
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[TRUNC1]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[TRUNC1]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -243,12 +243,12 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_s24
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[TRUNC1]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[TRUNC1]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -266,9 +266,9 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_s48
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[XOR]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[XOR]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s48) = G_TRUNC %0
@@ -286,9 +286,9 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[COPY]], [[COPY1]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[XOR]](<2 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[XOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s32>) = G_XOR %0, %1
@@ -303,20 +303,20 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_v3s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
-    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
-    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32)
-    ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
-    ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32)
-    ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32)
-    ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
-    ; CHECK: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>), [[DEF1]](<2 x s32>)
-    ; CHECK: [[UV6:%[0-9]+]]:_(<3 x s32>), [[UV7:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[UV6]](<3 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
+    ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>), [[DEF1]](<2 x s32>)
+    ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<3 x s32>), [[UV7:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV6]](<3 x s32>)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
     %2:_(<3 x s32>) = G_XOR %0, %1
@@ -331,13 +331,13 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_v4s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
-    ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[UV]], [[UV2]]
-    ; CHECK: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[UV1]], [[UV3]]
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[UV]], [[UV2]]
+    ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[UV1]], [[UV3]]
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<4 x s32>) = G_XOR %0, %1
@@ -351,25 +351,25 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_v5s32
     ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>)
-    ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
-    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
-    ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32)
-    ; CHECK: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>)
-    ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32)
-    ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32)
-    ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32)
-    ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR3]]
-    ; CHECK: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR4]]
-    ; CHECK: [[XOR2:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR2]], [[BUILD_VECTOR5]]
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>), [[XOR2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>)
-    ; CHECK: [[UV10:%[0-9]+]]:_(<5 x s32>), [[UV11:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s32>)
-    ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[UV10]](<5 x s32>), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>)
+    ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32)
+    ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>)
+    ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32)
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR3]]
+    ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR4]]
+    ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR2]], [[BUILD_VECTOR5]]
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>), [[XOR2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>)
+    ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<5 x s32>), [[UV11:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s32>)
+    ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[UV10]](<5 x s32>), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>)
     %0:_(<5 x s32>) = G_IMPLICIT_DEF
     %1:_(<5 x s32>) = G_IMPLICIT_DEF
     %2:_(<5 x s32>) = G_XOR %0, %1
@@ -386,13 +386,13 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_v2s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[UV]], [[UV2]]
-    ; CHECK: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[UV1]], [[UV3]]
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[XOR]](s64), [[XOR1]](s64)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[UV]], [[UV2]]
+    ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[UV1]], [[UV3]]
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[XOR]](s64), [[XOR1]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<2 x s64>) = G_XOR %0, %1
@@ -407,9 +407,9 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_v2s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY]], [[COPY1]]
-    ; CHECK: $vgpr0 = COPY [[XOR]](<2 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[XOR]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<2 x s16>) = G_XOR %0, %1
@@ -423,41 +423,41 @@ body: |
     liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
     ; CHECK-LABEL: name: test_xor_v3s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
-    ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; CHECK: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
-    ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0
-    ; CHECK: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT]], [[INSERT1]]
-    ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
-    ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
-    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
+    ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT]], [[INSERT1]]
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
     %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0
@@ -476,9 +476,9 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_v4s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; CHECK: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[COPY]], [[COPY1]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[XOR]](<4 x s16>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[XOR]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
     %2:_(<4 x s16>) = G_XOR %0, %1
@@ -492,54 +492,54 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_v5s16
     ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
-    ; CHECK: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; CHECK: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0
-    ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0
-    ; CHECK: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>)
-    ; CHECK: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>)
-    ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0
-    ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0
-    ; CHECK: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT2]], [[INSERT3]]
-    ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0
-    ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0
-    ; CHECK: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT4]], [[INSERT5]]
-    ; CHECK: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
-    ; CHECK: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR1]](<4 x s16>)
-    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CHECK: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
-    ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; CHECK: [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>), [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>)
-    ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV20]](<5 x s16>), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
+    ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
+    ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0
+    ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0
+    ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>)
+    ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>)
+    ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0
+    ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT2]], [[INSERT3]]
+    ; CHECK-NEXT: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0
+    ; CHECK-NEXT: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0
+    ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT4]], [[INSERT5]]
+    ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+    ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR1]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
+    ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>), [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>)
+    ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV20]](<5 x s16>), 0
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>)
     %0:_(<5 x s16>) = G_IMPLICIT_DEF
     %1:_(<5 x s16>) = G_IMPLICIT_DEF
     %2:_(<5 x s16>) = G_XOR %0, %1
@@ -555,33 +555,33 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_v3s8
     ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF
-    ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF
-    ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0
-    ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF1]](<3 x s8>), 0
-    ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>)
-    ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8)
-    ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]]
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[XOR]](s32)
-    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
-    ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8)
-    ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT2]], [[ANYEXT3]]
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[XOR1]](s32)
-    ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
-    ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8)
-    ; CHECK: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT4]], [[ANYEXT5]]
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[XOR2]](s32)
-    ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
-    ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8)
-    ; CHECK: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT6]], [[ANYEXT7]]
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[XOR3]](s32)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8)
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>)
-    ; CHECK: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>)
-    ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0
+    ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF1]](<3 x s8>), 0
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>)
+    ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>)
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8)
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]]
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[XOR]](s32)
+    ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
+    ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8)
+    ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT2]], [[ANYEXT3]]
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[XOR1]](s32)
+    ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
+    ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8)
+    ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT4]], [[ANYEXT5]]
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[XOR2]](s32)
+    ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
+    ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8)
+    ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT6]], [[ANYEXT7]]
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[XOR3]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8)
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>)
+    ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>)
+    ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>)
     %0:_(<3 x s8>) = G_IMPLICIT_DEF
     %1:_(<3 x s8>) = G_IMPLICIT_DEF
     %2:_(<3 x s8>) = G_XOR %0, %1
@@ -596,15 +596,15 @@ body: |
 
     ; CHECK-LABEL: name: test_xor_v4s8
     ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
-    ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>)
-    ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV4]]
-    ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV1]], [[UV5]]
-    ; CHECK: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV6]]
-    ; CHECK: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[UV3]], [[UV7]]
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32), [[XOR2]](s32), [[XOR3]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
+    ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>)
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV4]]
+    ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV1]], [[UV5]]
+    ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV6]]
+    ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[UV3]], [[UV7]]
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32), [[XOR2]](s32), [[XOR3]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(<4 x s8>) = G_IMPLICIT_DEF
     %1:_(<4 x s8>) = G_IMPLICIT_DEF
     %2:_(<4 x s8>) = G_XOR %0, %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
index f3f9d22a36390..5cac993556522 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
@@ -9,8 +9,8 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s32_to_s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     %0:_(s32) = COPY $vgpr0
     %1:_(s64) = G_ZEXT %0
     $vgpr0_vgpr1 = COPY %1
@@ -24,10 +24,10 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s16_to_s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
-    ; CHECK: $vgpr0_vgpr1 = COPY [[AND]](s64)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64)
     %0:_(s32) = COPY $vgpr0
     %1:_(s16) = G_TRUNC %0
     %2:_(s64) = G_ZEXT %1
@@ -42,9 +42,9 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s16_to_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; CHECK: $vgpr0 = COPY [[AND]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s16) = G_TRUNC %0
     %2:_(s32) = G_ZEXT %1
@@ -59,9 +59,9 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s24_to_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; CHECK: $vgpr0 = COPY [[AND]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s24) = G_TRUNC %0
     %2:_(s32) = G_ZEXT %1
@@ -76,12 +76,12 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s32_to_s96
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96)
     %0:_(s32) = COPY $vgpr0
     %1:_(s96) = G_ZEXT %0
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -94,7 +94,7 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_i1_to_s32
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: $vgpr0 = COPY [[C]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
     %0:_(s1) = G_CONSTANT i1 0
     %1:_(s32) = G_ZEXT %0
     $vgpr0 = COPY %1
@@ -107,7 +107,7 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_i1_to_i64
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: $vgpr0_vgpr1 = COPY [[C]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[C]](s64)
     %0:_(s1) = G_CONSTANT i1 0
     %1:_(s64) = G_ZEXT %0
     $vgpr0_vgpr1 = COPY %1
@@ -121,14 +121,14 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_v2s16_to_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s32>) = G_ZEXT %0
     $vgpr0_vgpr1 = COPY %1
@@ -142,20 +142,20 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_v3s16_to_v3s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
-    ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_EXTRACT %0, 0
     %2:_(<3 x s32>) = G_ZEXT %1
@@ -170,19 +170,19 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_v4s16_to_v4s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s32>) = G_ZEXT %0
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -196,11 +196,11 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_v2s32_to_v2s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32)
-    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32)
+    ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_ZEXT %0
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -214,12 +214,12 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_v3s32_to_v3s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32)
-    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32)
-    ; CHECK: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64), [[ZEXT2]](s64)
-    ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32)
+    ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32)
+    ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64), [[ZEXT2]](s64)
+    ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s64>) = G_ZEXT %0
     S_NOP 0, implicit %1
@@ -234,13 +234,13 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_v4s32_to_v4s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32)
-    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32)
-    ; CHECK: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32)
-    ; CHECK: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64), [[ZEXT2]](s64), [[ZEXT3]](s64)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32)
+    ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32)
+    ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32)
+    ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64), [[ZEXT2]](s64), [[ZEXT3]](s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<4 x s64>) = G_ZEXT %0
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -254,10 +254,10 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s8_to_s16
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
-    ; CHECK: S_ENDPGM 0, implicit [[AND]](s16)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[AND]](s16)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_TRUNC %0
     %2:_(s16) = G_ZEXT %1
@@ -272,10 +272,10 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s8_to_s24
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[AND]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s24)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[AND]](s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s24)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_TRUNC %0
     %2:_(s24) = G_ZEXT %1
@@ -291,9 +291,9 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s7_to_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; CHECK: S_ENDPGM 0, implicit [[AND]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[AND]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s7) = G_TRUNC %0
     %2:_(s32) = G_ZEXT %1
@@ -308,9 +308,9 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s8_to_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
-    ; CHECK: S_ENDPGM 0, implicit [[AND]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[AND]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s8) = G_TRUNC %0
     %2:_(s32) = G_ZEXT %1
@@ -325,11 +325,11 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s32_to_s128
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV1]](s128)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s128)
     %0:_(s32) = COPY $vgpr0
     %1:_(s128) = G_ZEXT %0
     S_ENDPGM 0, implicit %1
@@ -343,12 +343,12 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s32_to_s160
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[MV1:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s160) = G_TRUNC [[MV1]](s320)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s160)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s160) = G_TRUNC [[MV1]](s320)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s160)
     %0:_(s32) = COPY $vgpr0
     %1:_(s160) = G_ZEXT %0
     S_ENDPGM 0, implicit %1
@@ -363,11 +363,11 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s32_to_s192
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV1]](s192)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s192)
     %0:_(s32) = COPY $vgpr0
     %1:_(s192) = G_ZEXT %0
     S_ENDPGM 0, implicit %1
@@ -381,12 +381,12 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s32_to_s224
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV1]](s448)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s224)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV1]](s448)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s224)
     %0:_(s32) = COPY $vgpr0
     %1:_(s224) = G_ZEXT %0
     S_ENDPGM 0, implicit %1
@@ -400,11 +400,11 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s32_to_s256
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV1]](s256)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s256)
     %0:_(s32) = COPY $vgpr0
     %1:_(s256) = G_ZEXT %0
     S_ENDPGM 0, implicit %1
@@ -418,11 +418,11 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s32_to_s512
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[MV1:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV1]](s512)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s512)
     %0:_(s32) = COPY $vgpr0
     %1:_(s512) = G_ZEXT %0
     S_ENDPGM 0, implicit %1
@@ -436,12 +436,12 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s32_to_s992
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV1]](s448)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s224)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV1]](s448)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s224)
     %0:_(s32) = COPY $vgpr0
     %1:_(s224) = G_ZEXT %0
     S_ENDPGM 0, implicit %1
@@ -455,11 +455,11 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s32_to_s1024
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[MV1:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV1]](s1024)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV1]](s1024)
     %0:_(s32) = COPY $vgpr0
     %1:_(s1024) = G_ZEXT %0
     S_ENDPGM 0, implicit %1
@@ -473,9 +473,9 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s64_to_s128
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s128)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s128)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_ZEXT %0
     S_ENDPGM 0, implicit %1
@@ -489,9 +489,9 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s64_to_s192
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64), [[C]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s192)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64), [[C]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s192)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s192) = G_ZEXT %0
     S_ENDPGM 0, implicit %1
@@ -505,9 +505,9 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s64_to_s256
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64), [[C]](s64), [[C]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s256)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64), [[C]](s64), [[C]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s256)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s256) = G_ZEXT %0
     S_ENDPGM 0, implicit %1
@@ -521,9 +521,9 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s64_to_s512
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s512)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s512)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s512) = G_ZEXT %0
     S_ENDPGM 0, implicit %1
@@ -537,9 +537,9 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s64_to_s1024
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s1024)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s1024)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s1024) = G_ZEXT %0
     S_ENDPGM 0, implicit %1
@@ -553,12 +553,12 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s96_to_s128
     ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
-    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[C]](s32)
-    ; CHECK: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV2]](s128)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV2]](s128)
     %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s128) = G_ZEXT %0
     S_ENDPGM 0, implicit %1
@@ -572,10 +572,10 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s128_to_s256
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[C]](s64), [[C]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s256)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[C]](s64), [[C]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s256)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s256) = G_ZEXT %0
     S_ENDPGM 0, implicit %1
@@ -589,44 +589,44 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s32_to_s88
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
-    ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
-    ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
-    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
-    ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; CHECK: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
-    ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C5]], [[C3]]
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND4]], [[C4]](s16)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[OR2]](s16)
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[COPY1]](s16)
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32)
-    ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[MV1:%[0-9]+]]:_(s704) = G_MERGE_VALUES [[MV]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64)
-    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s88) = G_TRUNC [[MV1]](s704)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC4]](s88)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C5]], [[C3]]
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND4]], [[C4]](s16)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[OR2]](s16)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[COPY1]](s16)
+    ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32)
+    ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32)
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s704) = G_MERGE_VALUES [[MV]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64)
+    ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s88) = G_TRUNC [[MV1]](s704)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC4]](s88)
     %0:_(s32) = COPY $vgpr0
     %1:_(s88) = G_ZEXT %0
     S_ENDPGM 0, implicit %1
@@ -654,114 +654,114 @@ body: |
 
     ; CHECK-LABEL: name: test_zext_s2_to_s112
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
-    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C2]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
-    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
-    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]]
-    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C2]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
-    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C2]](s32)
-    ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL5]]
-    ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
-    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV2]](s64)
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s48) = G_EXTRACT [[DEF]](s64), 0
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
-    ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s48) = G_EXTRACT [[MV1]](s64), 0
-    ; CHECK: [[AND9:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s48)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT1]](s48)
-    ; CHECK: [[AND10:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[ANYEXT1]]
-    ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND9]](s64)
-    ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND10]](s64)
-    ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
-    ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32)
-    ; CHECK: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
-    ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C2]](s32)
-    ; CHECK: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
-    ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C2]](s32)
-    ; CHECK: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
-    ; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV14]], [[C2]](s32)
-    ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]]
-    ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
-    ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32)
-    ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]]
-    ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]]
-    ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32)
-    ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]]
-    ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32)
-    ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C3]]
-    ; CHECK: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
-    ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND16]], [[C2]](s32)
-    ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND15]], [[SHL8]]
-    ; CHECK: [[AND17:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C3]]
-    ; CHECK: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND17]], [[SHL3]]
-    ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32)
-    ; CHECK: [[AND18:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C3]]
-    ; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C2]](s32)
-    ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL9]]
-    ; CHECK: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
-    ; CHECK: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV9]], [[C3]]
-    ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C2]](s32)
-    ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL10]]
-    ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; CHECK: [[AND21:%[0-9]+]]:_(s32) = G_AND [[UV10]], [[C3]]
-    ; CHECK: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
-    ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C2]](s32)
-    ; CHECK: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL11]]
-    ; CHECK: [[AND23:%[0-9]+]]:_(s32) = G_AND [[UV11]], [[C3]]
-    ; CHECK: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C3]]
-    ; CHECK: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C2]](s32)
-    ; CHECK: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL12]]
-    ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s32), [[OR13]](s32)
-    ; CHECK: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
-    ; CHECK: [[AND26:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C3]]
-    ; CHECK: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C2]](s32)
-    ; CHECK: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL13]]
-    ; CHECK: [[AND27:%[0-9]+]]:_(s32) = G_AND [[UV14]], [[C3]]
-    ; CHECK: [[AND28:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
-    ; CHECK: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND28]], [[C2]](s32)
-    ; CHECK: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND27]], [[SHL14]]
-    ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR15]](s32)
-    ; CHECK: [[AND29:%[0-9]+]]:_(s32) = G_AND [[UV15]], [[C3]]
-    ; CHECK: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND29]], [[SHL3]]
-    ; CHECK: [[OR17:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]]
-    ; CHECK: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; CHECK: [[MV9:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV9]](s384)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s112)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
+    ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
+    ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
+    ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]]
+    ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
+    ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL5]]
+    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV2]](s64)
+    ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s48) = G_EXTRACT [[DEF]](s64), 0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+    ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s48) = G_EXTRACT [[MV1]](s64), 0
+    ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s48)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT1]](s48)
+    ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[ANYEXT1]]
+    ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND9]](s64)
+    ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND10]](s64)
+    ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
+    ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32)
+    ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
+    ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C2]](s32)
+    ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
+    ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C2]](s32)
+    ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
+    ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV14]], [[C2]](s32)
+    ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]]
+    ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]]
+    ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]]
+    ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
+    ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]]
+    ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32)
+    ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C3]]
+    ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
+    ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND16]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND15]], [[SHL8]]
+    ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C3]]
+    ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND17]], [[SHL3]]
+    ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32)
+    ; CHECK-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C3]]
+    ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL9]]
+    ; CHECK-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
+    ; CHECK-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV9]], [[C3]]
+    ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL10]]
+    ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; CHECK-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[UV10]], [[C3]]
+    ; CHECK-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
+    ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL11]]
+    ; CHECK-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[UV11]], [[C3]]
+    ; CHECK-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C3]]
+    ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL12]]
+    ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s32), [[OR13]](s32)
+    ; CHECK-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
+    ; CHECK-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C3]]
+    ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL13]]
+    ; CHECK-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[UV14]], [[C3]]
+    ; CHECK-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
+    ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND28]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND27]], [[SHL14]]
+    ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR15]](s32)
+    ; CHECK-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[UV15]], [[C3]]
+    ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND29]], [[SHL3]]
+    ; CHECK-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]]
+    ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV9]](s384)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s112)
     %0:_(s32) = COPY $vgpr0
     %1:_(s2) = G_TRUNC %0
     %2:_(s112) = G_ZEXT %1
@@ -775,13 +775,13 @@ body: |
     liveins: $vgpr0_vgpr1_vgpr2_vgpr3
     ; CHECK-LABEL: name: test_zext_s112_to_s128
     ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
-    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
-    ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C]]
-    ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C1]]
-    ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[MV]](s128)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C1]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s128)
     %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s112) = G_TRUNC %0
     %2:_(s128) = G_ZEXT %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir
index 7b454aea94a44..5bf5b4cf143c3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir
@@ -11,11 +11,11 @@ body: |
 
     ; CI-LABEL: name: test_zextload_constant32bit_s64_s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
-    ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
-    ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6)
-    ; CI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6)
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     %0:_(p6) = COPY $sgpr0
     %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 4, addrspace 6)
     $vgpr0_vgpr1 = COPY %1
@@ -29,11 +29,11 @@ body: |
 
     ; CI-LABEL: name: test_zextload_constant32bit_s64_s32_align2
     ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
-    ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
-    ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 2, addrspace 6)
-    ; CI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 2, addrspace 6)
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     %0:_(p6) = COPY $sgpr0
     %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 2, addrspace 6)
     $vgpr0_vgpr1 = COPY %1
@@ -47,11 +47,11 @@ body: |
 
     ; CI-LABEL: name: test_zextload_constant32bit_s64_s32_align1
     ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
-    ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
-    ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 1, addrspace 6)
-    ; CI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
-    ; CI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 1, addrspace 6)
+    ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+    ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     %0:_(p6) = COPY $sgpr0
     %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 1, addrspace 6)
     $vgpr0_vgpr1 = COPY %1
@@ -65,10 +65,10 @@ body: |
 
     ; CI-LABEL: name: test_zextload_constant32bit_s32_s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
-    ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
-    ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
-    ; CI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6)
-    ; CI: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
+    ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6)
+    ; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     %0:_(p6) = COPY $sgpr0
     %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), align 1, addrspace 6)
     $vgpr0 = COPY %1
@@ -82,10 +82,10 @@ body: |
 
     ; CI-LABEL: name: test_zextload_constant32bit_s32_s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
-    ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
-    ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
-    ; CI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s16), addrspace 6)
-    ; CI: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
+    ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s16), addrspace 6)
+    ; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     %0:_(p6) = COPY $sgpr0
     %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), align 2, addrspace 6)
     $vgpr0 = COPY %1
@@ -99,10 +99,10 @@ body: |
 
     ; CI-LABEL: name: test_zextload_constant32bit_s32_s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
-    ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
-    ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
-    ; CI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s16), align 1, addrspace 6)
-    ; CI: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; CI-NEXT: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
+    ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
+    ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s16), align 1, addrspace 6)
+    ; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     %0:_(p6) = COPY $sgpr0
     %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), align 1, addrspace 6)
     $vgpr0 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-flat.mir
index 417b271c494b0..044d5fc4405bd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-flat.mir
@@ -9,12 +9,12 @@ body: |
 
     ; SI-LABEL: name: test_zextload_flat_i32_i8
     ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; SI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8))
-    ; SI: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8))
+    ; SI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     ; VI-LABEL: name: test_zextload_flat_i32_i8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8))
-    ; VI: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 0)
     $vgpr0 = COPY %1
@@ -27,12 +27,12 @@ body: |
 
     ; SI-LABEL: name: test_zextload_flat_i32_i16
     ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; SI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16))
-    ; SI: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16))
+    ; SI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     ; VI-LABEL: name: test_zextload_flat_i32_i16
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16))
-    ; VI: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16))
+    ; VI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
      %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 0)
     $vgpr0 = COPY %1
@@ -45,12 +45,12 @@ body: |
 
     ; SI-LABEL: name: test_zextload_flat_i31_i8
     ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; SI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8))
-    ; SI: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8))
+    ; SI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     ; VI-LABEL: name: test_zextload_flat_i31_i8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8))
-    ; VI: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s31) = G_ZEXTLOAD %0 :: (load (s8), addrspace 0)
     %2:_(s32) = G_ANYEXT %1
@@ -64,14 +64,14 @@ body: |
 
     ; SI-LABEL: name: test_zextload_flat_i64_i8
     ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; SI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8))
-    ; SI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8))
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     ; VI-LABEL: name: test_zextload_flat_i64_i8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8))
-    ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8))
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_ZEXTLOAD %0 :: (load (s8), addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -84,14 +84,14 @@ body: |
 
     ; SI-LABEL: name: test_zextload_flat_i64_i16
     ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; SI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16))
-    ; SI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16))
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     ; VI-LABEL: name: test_zextload_flat_i64_i16
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16))
-    ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16))
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_ZEXTLOAD %0 :: (load (s16), addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -104,14 +104,14 @@ body: |
 
     ; SI-LABEL: name: test_zextload_flat_i64_i32
     ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; SI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
-    ; SI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+    ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     ; VI-LABEL: name: test_zextload_flat_i64_i32
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
-    ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
-    ; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), addrspace 0)
     $vgpr0_vgpr1 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir
index ac846e122251a..132fdb146811a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir
@@ -22,14 +22,14 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_i32_i1
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX8: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 1
-    ; GFX8: $vgpr0 = COPY [[ASSERT_ZEXT]](s32)
+    ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX8-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 1
+    ; GFX8-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](s32)
     ; GFX6-LABEL: name: test_zextload_global_i32_i1
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX6: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 1
-    ; GFX6: $vgpr0 = COPY [[ASSERT_ZEXT]](s32)
+    ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX6-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 1
+    ; GFX6-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_ZEXTLOAD %0 :: (load (s1), addrspace 1)
     $vgpr0 = COPY %1
@@ -43,14 +43,14 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_i32_i7
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX8: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 7
-    ; GFX8: $vgpr0 = COPY [[ASSERT_ZEXT]](s32)
+    ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX8-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 7
+    ; GFX8-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](s32)
     ; GFX6-LABEL: name: test_zextload_global_i32_i7
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX6: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 7
-    ; GFX6: $vgpr0 = COPY [[ASSERT_ZEXT]](s32)
+    ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX6-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 7
+    ; GFX6-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_ZEXTLOAD %0 :: (load (s7), addrspace 1)
     $vgpr0 = COPY %1
@@ -64,24 +64,24 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_i32_i24
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX8: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX8: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
-    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX8: $vgpr0 = COPY [[OR]](s32)
+    ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX8-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX8-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+    ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
+    ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; GFX8-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; GFX6-LABEL: name: test_zextload_global_i32_i24
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
-    ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX6: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
-    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX6: $vgpr0 = COPY [[OR]](s32)
+    ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX6-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+    ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
+    ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; GFX6-NEXT: $vgpr0 = COPY [[OR]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_ZEXTLOAD %0 :: (load (s24), addrspace 1)
     $vgpr0 = COPY %1
@@ -95,14 +95,14 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_i32_i30
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX8: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 30
-    ; GFX8: $vgpr0 = COPY [[ASSERT_ZEXT]](s32)
+    ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX8-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 30
+    ; GFX8-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](s32)
     ; GFX6-LABEL: name: test_zextload_global_i32_i30
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX6: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 30
-    ; GFX6: $vgpr0 = COPY [[ASSERT_ZEXT]](s32)
+    ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX6-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 30
+    ; GFX6-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_ZEXTLOAD %0 :: (load (s30), addrspace 1)
     $vgpr0 = COPY %1
@@ -116,14 +116,14 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_i32_i31
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX8: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 31
-    ; GFX8: $vgpr0 = COPY [[ASSERT_ZEXT]](s32)
+    ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX8-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 31
+    ; GFX8-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](s32)
     ; GFX6-LABEL: name: test_zextload_global_i32_i31
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX6: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 31
-    ; GFX6: $vgpr0 = COPY [[ASSERT_ZEXT]](s32)
+    ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX6-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 31
+    ; GFX6-NEXT: $vgpr0 = COPY [[ASSERT_ZEXT]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_ZEXTLOAD %0 :: (load (s31), addrspace 1)
     $vgpr0 = COPY %1
@@ -137,12 +137,12 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_i32_i8
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX8-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     ; GFX6-LABEL: name: test_zextload_global_i32_i8
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX6-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1)
     $vgpr0 = COPY %1
@@ -155,12 +155,12 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_i32_i16
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX8-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     ; GFX6-LABEL: name: test_zextload_global_i32_i16
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX6-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 1)
     $vgpr0 = COPY %1
@@ -173,12 +173,12 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_i31_i8
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX8-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     ; GFX6-LABEL: name: test_zextload_global_i31_i8
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX6-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s31) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1)
     %2:_(s32) = G_ANYEXT %1
@@ -192,14 +192,14 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_i64_i8
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX8: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
+    ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     ; GFX6-LABEL: name: test_zextload_global_i64_i8
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
-    ; GFX6: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
+    ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -212,14 +212,14 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_i64_i16
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX8: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
+    ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     ; GFX6-LABEL: name: test_zextload_global_i64_i16
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
-    ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
-    ; GFX6: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
+    ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_ZEXTLOAD %0 :: (load (s16), addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -232,14 +232,14 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_i64_i32
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX8: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; GFX8-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+    ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     ; GFX6-LABEL: name: test_zextload_global_i64_i32
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
-    ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
-    ; GFX6: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; GFX6-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+    ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+    ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -253,18 +253,18 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_s32_from_2_align1
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; GFX8-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     ; GFX6-LABEL: name: test_zextload_global_s32_from_2_align1
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX6: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX6: $vgpr0 = COPY [[OR]](s32)
+    ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX6-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
+    ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; GFX6-NEXT: $vgpr0 = COPY [[OR]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), align 1, addrspace 1)
     $vgpr0 = COPY %1
@@ -278,20 +278,20 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_s64_from_2_align1
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
-    ; GFX8: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
+    ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     ; GFX6-LABEL: name: test_zextload_global_s64_from_2_align1
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
-    ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX6: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
-    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32)
-    ; GFX6: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+    ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX6-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+    ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
+    ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32)
+    ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_ZEXTLOAD %0 :: (load (s16), align 1, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -305,12 +305,12 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_v2i16_from_2
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1)
-    ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](<2 x s16>)
+    ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1)
+    ; GFX8-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](<2 x s16>)
     ; GFX6-LABEL: name: test_zextload_global_v2i16_from_2
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1)
-    ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](<2 x s16>)
+    ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1)
+    ; GFX6-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = G_ZEXTLOAD %0 :: (load (<2 x s8>), addrspace 1)
     $vgpr0 = COPY %1
@@ -324,12 +324,12 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_v2i32_from_2
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>)
+    ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1)
+    ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>)
     ; GFX6-LABEL: name: test_zextload_global_v2i32_from_2
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1)
-    ; GFX6: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>)
+    ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1)
+    ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_ZEXTLOAD %0 :: (load (<2 x s8>), addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -343,12 +343,12 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_v2i32_from_4
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>)
+    ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>)
     ; GFX6-LABEL: name: test_zextload_global_v2i32_from_4
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; GFX6: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>)
+    ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_ZEXTLOAD %0 :: (load (<2 x s16>), addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -362,12 +362,12 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_v2i64_from_4
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>)
+    ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>)
     ; GFX6-LABEL: name: test_zextload_global_v2i64_from_4
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
-    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>)
+    ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1)
+    ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_ZEXTLOAD %0 :: (load (<2 x s16>), addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -381,12 +381,12 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_v2i64_from_8
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
-    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>)
+    ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+    ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>)
     ; GFX6-LABEL: name: test_zextload_global_v2i64_from_8
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
-    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>)
+    ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+    ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s64>) = G_ZEXTLOAD %0 :: (load (<2 x s32>), addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -400,12 +400,12 @@ body: |
 
     ; GFX8-LABEL: name: test_zextload_global_s128_8
     ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s128) = G_ZEXTLOAD [[COPY]](p1) :: (load (s64), addrspace 1)
-    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](s128)
+    ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s128) = G_ZEXTLOAD [[COPY]](p1) :: (load (s64), addrspace 1)
+    ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](s128)
     ; GFX6-LABEL: name: test_zextload_global_s128_8
     ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s128) = G_ZEXTLOAD [[COPY]](p1) :: (load (s64), addrspace 1)
-    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](s128)
+    ; GFX6-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s128) = G_ZEXTLOAD [[COPY]](p1) :: (load (s64), addrspace 1)
+    ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](s128)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_ZEXTLOAD %0 :: (load (s64), addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-local.mir
index 49f1d5d5f601a..78ac3aadc0d64 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-local.mir
@@ -9,8 +9,8 @@ body: |
 
     ; CHECK-LABEL: name: test_zextload_local_i32_i8
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CHECK: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 3)
     $vgpr0 = COPY %1
@@ -23,8 +23,8 @@ body: |
 
     ; CHECK-LABEL: name: test_zextload_local_i32_i16
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CHECK: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 3)
     $vgpr0 = COPY %1
@@ -37,8 +37,8 @@ body: |
 
     ; CHECK-LABEL: name: test_zextload_local_i31_i8
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CHECK: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s31) = G_ZEXTLOAD %0 :: (load (s8), addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -52,9 +52,9 @@ body: |
 
     ; CHECK-LABEL: name: test_zextload_local_i64_i8
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_ZEXTLOAD %0 :: (load (s8), addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -67,9 +67,9 @@ body: |
 
     ; CHECK-LABEL: name: test_zextload_local_i64_i16
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_ZEXTLOAD %0 :: (load (s16), addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -82,9 +82,9 @@ body: |
 
     ; CHECK-LABEL: name: test_zextload_local_i64_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), addrspace 3)
     $vgpr0_vgpr1 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-private.mir
index c3fb275dc32fd..bd612251267ea 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-private.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-private.mir
@@ -10,8 +10,8 @@ body: |
 
     ; CHECK-LABEL: name: test_zextload_private_i32_i8
     ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CHECK: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 5)
     $vgpr0 = COPY %1
@@ -24,8 +24,8 @@ body: |
 
     ; CHECK-LABEL: name: test_zextload_private_i32_i16
     ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CHECK: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 5)
     $vgpr0 = COPY %1
@@ -38,8 +38,8 @@ body: |
 
     ; CHECK-LABEL: name: test_zextload_private_i31_i8
     ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CHECK: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+    ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s31) = G_ZEXTLOAD %0 :: (load (s8), addrspace 5)
     %2:_(s32) = G_ANYEXT %1
@@ -53,9 +53,9 @@ body: |
 
     ; CHECK-LABEL: name: test_zextload_private_i64_i8
     ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5)
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_ZEXTLOAD %0 :: (load (s8), addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -68,9 +68,9 @@ body: |
 
     ; CHECK-LABEL: name: test_zextload_private_i64_i16
     ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5)
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_ZEXTLOAD %0 :: (load (s16), addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -83,9 +83,9 @@ body: |
 
     ; CHECK-LABEL: name: test_zextload_private_i64_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), addrspace 5)
     $vgpr0_vgpr1 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir
index 067158e1c66d2..573094c7335b9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir
@@ -11,10 +11,11 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: sextload_from_inreg
     ; CHECK: liveins: $vgpr0_vgpr1
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[LOAD]], 8
-    ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64)
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[LOAD]], 8
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 1)
     %2:_(s64) = G_SEXT_INREG %1, 8


        


More information about the llvm-commits mailing list